From 28e37a37fdd44d9eedfe531e5fcad3a0ac6abed5 Mon Sep 17 00:00:00 2001
From: Aman Dwivedi <aman.dwivedi5@gmail.com>
Date: Wed, 30 Dec 2020 14:10:12 +0530
Subject: [PATCH] refactor(argparse): Shifted from argparse to plac

---
 MANIFEST.in                                |  3 +-
 README.md                                  |  2 +-
 atarashi/agents/atarashiAgent.py           |  2 +-
 atarashi/agents/cosineSimNgram.py          | 32 +++++-----
 atarashi/agents/dameruLevenDist.py         | 28 ++++-----
 atarashi/agents/tfidf.py                   | 30 ++++------
 atarashi/agents/wordFrequencySimilarity.py | 27 ++++-----
 atarashi/atarashii.py                      | 70 ++++++++++------------
 atarashi/build_deps.py                     | 21 +++----
 atarashi/evaluator/evaluator.py            | 50 ++++++++--------
 atarashi/imtihaan.py                       | 36 +++++------
 atarashi/libs/commentPreprocessor.py       | 28 ++++-----
 atarashi/libs/initialmatch.py              |  2 +-
 atarashi/libs/license_clustering.py        | 19 +++---
 atarashi/libs/ngram.py                     | 29 ++++-----
 atarashi/libs/utils.py                     |  2 +-
 atarashi/license/licenseDownloader.py      | 21 ++++---
 atarashi/license/licenseLoader.py          |  2 +-
 atarashi/license/licensePreprocessor.py    | 24 ++++----
 atarashi/license/license_merger.py         | 22 +++----
 pyproject.toml                             |  3 +-
 requirements.txt                           |  3 +-
 setup.py                                   |  2 +-
 23 files changed, 211 insertions(+), 247 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index 0957c605..eca21c5a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -14,5 +14,4 @@ include atarashi/data/Ngram_keywords.json
 
 prune .git
 prune venv
-prune test*
-
+prune test*
\ No newline at end of file
diff --git a/README.md b/README.md
index 8ed5f854..2d527ed6 100644
--- a/README.md
+++ b/README.md
@@ -195,4 +195,4 @@ This will generate file in `docs/_build/html`. Go to: index.html
 
 You can change the theme of the documentation by changing `html_theme` in config.py file in `docs/` folder.
 You can choose from {'alabaster', 'classic', 'sphinxdoc', 'scrolls', 'agogo', 'traditional', 'nature', 'haiku', 'pyramid', 'bizstyle'}
-[Reference](https://www.sphinx-doc.org/en/master/usage/theming.html)
+[Reference](https://www.sphinx-doc.org/en/master/usage/theming.html)
\ No newline at end of file
diff --git a/atarashi/agents/atarashiAgent.py b/atarashi/agents/atarashiAgent.py
index 46332e8d..75bc11bd 100644
--- a/atarashi/agents/atarashiAgent.py
+++ b/atarashi/agents/atarashiAgent.py
@@ -72,4 +72,4 @@ def exactMatcher(licenseText, licenses):
       output.append(licenses.iloc[idx]['shortname'])
   if not output:
     return -1
-  return output
+  return output
\ No newline at end of file
diff --git a/atarashi/agents/cosineSimNgram.py b/atarashi/agents/cosineSimNgram.py
index acc6b391..a3d44d86 100644
--- a/atarashi/agents/cosineSimNgram.py
+++ b/atarashi/agents/cosineSimNgram.py
@@ -18,7 +18,7 @@
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-import argparse
+import plac
 from enum import Enum
 import itertools
 import json
@@ -184,24 +184,16 @@ def setSimAlgo(self, newAlgo):
       self.simType = newAlgo
 
 
-if __name__ == "__main__":
-  parser = argparse.ArgumentParser()
-  parser.add_argument("processedLicenseList", help="Specify the processed license list file")
-  parser.add_argument("ngramJson", help="Specify the location of NGRAM JSON")
-  parser.add_argument("inputFile", help="Specify the input file which needs to be scanned")
-  parser.add_argument("-s", "--similarity", required=False, default="BigramCosineSim",
-                      choices=["CosineSim", "DiceSim", "BigramCosineSim"],
-                      help="Specify the similarity algorithm that you want")
-  parser.add_argument("-v", "--verbose", help="increase output verbosity",
-                      action='count', default=0)
-  args = parser.parse_args()
-
-  licenseList = args.processedLicenseList
-  ngramJsonLoc = args.ngramJson
-  inputFile = args.inputFile
-  simType = args.similarity
-  verbose = args.verbose
+@plac.annotations(
+  licenseList = plac.Annotation("Specify the processed license list file", "positional", None, str, metavar="processedLicenseList"),
+  ngramJsonLoc = plac.Annotation("Specify the location of NGRAM JSON", metavar="ngramJson"),
+  inputFile = plac.Annotation("Specify the input file which needs to be scanned"),
+  similarity = plac.Annotation("Specify the similarity algorithm that you want", "option", "s", str, ["CosineSim", "DiceSim", "BigramCosineSim"], metavar="{CosineSim,DiceSim,BigramCosineSim}"),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")  
+)
 
+def main(licenseList, ngramJsonLoc, inputFile, similarity="BigramCosineSim", verbose=False):
+  simType = similarity
   scanner = NgramAgent(licenseList, ngramJson=ngramJsonLoc, verbose=verbose)
   if simType == "CosineSim":
     scanner.setSimAlgo(NgramAgent.NgramAlgo.cosineSim)
@@ -215,3 +207,7 @@ def setSimAlgo(self, newAlgo):
     print("N-Gram identifier and " + str(simType) + " is " + str(result))
   else:
     print("Result is nothing")
+
+
+if __name__ == "__main__":
+  plac.call(main)
\ No newline at end of file
diff --git a/atarashi/agents/dameruLevenDist.py b/atarashi/agents/dameruLevenDist.py
index aa6b0351..e1ea27ed 100644
--- a/atarashi/agents/dameruLevenDist.py
+++ b/atarashi/agents/dameruLevenDist.py
@@ -19,7 +19,7 @@
 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 
-import argparse
+import plac
 import sys
 
 from pyxdameraulevenshtein import damerau_levenshtein_distance
@@ -62,18 +62,18 @@ def scan(self, filePath):
       return temp[0]
 
 
-if __name__ == "__main__":
-  print("The file has been run directly")
-  parser = argparse.ArgumentParser()
-  parser.add_argument("inputFile", help="Specify the input file which needs to be scanned")
-  parser.add_argument("processedLicenseList",
-                      help="Specify the processed license list file which contains licenses")
-  parser.add_argument("-v", "--verbose", help="increase output verbosity",
-                      action="count", default=0)
-  args = parser.parse_args()
-  filename = args.inputFile
-  licenseList = args.processedLicenseList
-  verbose = args.verbose
+@plac.annotations(
+  filename = plac.Annotation("Specify the input file which needs to be scanned", metavar="inputFile"),
+  licenseList = plac.Annotation("Specify the processed license list file which contains licenses", "positional", None, str, metavar="processedLicenseList"),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")  
+)
 
+
+def main(filename, licenseList, verbose=False):
+  print("The file has been run directly")
   scanner = DameruLevenDist(licenseList, verbose=verbose)
-  print("License Detected using Dameru Leven Distance: " + str(scanner.scan(filename)))
+  print("License Detected using Dameru Leven Distance: " + str(scanner.scan(filename)))  
+
+
+if __name__ == "__main__":
+  plac.call(main)
\ No newline at end of file
diff --git a/atarashi/agents/tfidf.py b/atarashi/agents/tfidf.py
index abfa3af1..07e676f3 100644
--- a/atarashi/agents/tfidf.py
+++ b/atarashi/agents/tfidf.py
@@ -22,7 +22,7 @@
 __author__ = "Aman Jain"
 __email__ = "amanjain5221@gmail.com"
 
-import argparse
+import plac
 from enum import Enum
 import itertools
 import time
@@ -151,24 +151,15 @@ def setSimAlgo(self, newAlgo):
       self.algo = newAlgo
 
 
-if __name__ == "__main__":
-  parser = argparse.ArgumentParser()
-  parser.add_argument("-s", "--tfidf_similarity", required=False,
-                      default="ScoreSim",
-                      choices=["CosineSim", "ScoreSim"],
-                      help="Specify the similarity algorithm that you want")
-  parser.add_argument("inputFile", help="Specify the input file which needs to be scanned")
-  parser.add_argument("processedLicenseList",
-                      help="Specify the processed license list file which contains licenses")
-  parser.add_argument("-v", "--verbose", help="increase output verbosity",
-                      action="count", default=0)
-  args = parser.parse_args()
-
-  tfidf_similarity = args.tfidf_similarity
-  filename = args.inputFile
-  licenseList = args.processedLicenseList
-  verbose = args.verbose
+@plac.annotations(
+  filename = plac.Annotation("Specify the input file which needs to be scanned", metavar="inputFile"),
+  licenseList = plac.Annotation("Specify the processed license list file which contains licenses", "positional", None, str, metavar="processedLicenseList"),
+  tfidf_similarity = plac.Annotation("Specify the similarity algorithm that you want", "option", "s", str, ["CosineSim", "ScoreSim"], metavar="{CosineSim,ScoreSim}"),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")  
+)
+
 
+def main(filename, licenseList, tfidf_similarity="ScoreSim", verbose=False):
   scanner = TFIDF(licenseList, verbose=verbose)
   if tfidf_similarity == "CosineSim":
     scanner.setSimAlgo(TFIDF.TfidfAlgo.cosineSim)
@@ -176,3 +167,6 @@ def setSimAlgo(self, newAlgo):
   else:
     scanner.setSimAlgo(TFIDF.TfidfAlgo.scoreSim)
     print("License Detected using TF-IDF algorithm + sum score " + str(scanner.scan(filename)))
+
+if __name__ == "__main__":
+  plac.call(main)
\ No newline at end of file
diff --git a/atarashi/agents/wordFrequencySimilarity.py b/atarashi/agents/wordFrequencySimilarity.py
index f365af64..fca62064 100644
--- a/atarashi/agents/wordFrequencySimilarity.py
+++ b/atarashi/agents/wordFrequencySimilarity.py
@@ -22,7 +22,7 @@
 __author__ = "Aman Jain"
 __email__ = "amanjain5221@gmail.com"
 
-import argparse
+import plac
 import re
 
 from atarashi.agents.atarashiAgent import AtarashiAgent, exactMatcher
@@ -79,19 +79,18 @@ def scan(self, filePath):
       return temp
 
 
-if __name__ == "__main__":
-  print("The file has been called from main")
-  parser = argparse.ArgumentParser()
-  parser.add_argument("inputFile", help = "Specify the input file which needs to be scanned")
-  parser.add_argument("processedLicenseList",
-                      help = "Specify the processed license list file which contains licenses")
-  parser.add_argument("-v", "--verbose", help = "increase output verbosity",
-                      action = "count", default = 0)
-
-  args = parser.parse_args()
-  filename = args.inputFile
-  licenseList = args.processedLicenseList
-  verbose = args.verbose
+@plac.annotations(
+  filename = plac.Annotation("Specify the input file which needs to be scanned", metavar="inputFile"),
+  licenseList = plac.Annotation("Specify the processed license list file which contains licenses", "positional", None, str, metavar="processedLicenseList"),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")  
+)
 
+
+def main(filename, licenseList, verbose=False):
+  print("The file has been called from main")
   scanner = WordFrequencySimilarity(licenseList, verbose = verbose)
   print("The result from Histogram similarity algo is ", scanner.scan(filename))
+
+
+if __name__ == "__main__":
+  plac.call(main)
\ No newline at end of file
diff --git a/atarashi/atarashii.py b/atarashi/atarashii.py
index 2540491a..bb588a2a 100644
--- a/atarashi/atarashii.py
+++ b/atarashi/atarashii.py
@@ -18,7 +18,7 @@
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-import argparse
+import plac
 import os
 import json
 from pkg_resources import resource_filename
@@ -33,7 +33,9 @@
 __version__ = "0.0.11"
 
 
-def atarashii_runner(inputFile, processedLicense, agent_name, similarity="CosineSim", ngramJsonLoc=None, verbose=None):
+
+
+def atarashii_runner(inputFile, agent_name, processedLicense, similarity="CosineSim", ngram_json=None, verbose=None):
   '''
   :param inputFile: Input File for scanning of license
   :param processedLicense: Processed License List (CSV) path (Default path already provided)
@@ -68,7 +70,7 @@ def atarashii_runner(inputFile, processedLicense, agent_name, similarity="Cosine
       print("Please choose similarity from {CosineSim,ScoreSim}")
       return -1
   elif agent_name == "Ngram":
-    scanner = NgramAgent(processedLicense, ngramJson=ngramJsonLoc)
+    scanner = NgramAgent(processedLicense, ngramJson=ngram_json)
     if similarity == "CosineSim":
       scanner.setSimAlgo(NgramAgent.NgramAlgo.cosineSim)
     elif similarity == "DiceSim":
@@ -84,43 +86,31 @@ def atarashii_runner(inputFile, processedLicense, agent_name, similarity="Cosine
   return result
 
 
-def main():
-  '''
-  Calls atarashii_runner for each file in the folder/ repository specified by user
-  Prints the Input file path and the JSON output from atarashii_runner
-  '''
+@plac.annotations(
+  agent_name = plac.Annotation("Name of the agent that needs to be run", "option", "a", str, ["wordFrequencySimilarity", "DLD", "tfidf", "Ngram"], metavar="{wordFrequencySimilarity,DLD,tfidf,Ngram}"),  
+  inputFile = plac.Annotation("Specify the input file path to scan", "positional", None, str, metavar="inputFile"),
+  processedLicense = plac.Annotation("Specify the location of processed license list file", "option", "l", str, metavar="PROCESSEDLICENSELIST"),
+  ngram_json = plac.Annotation("Specify the location of Ngram JSON (for Ngram agent only)", "option", "j"),
+  similarity = plac.Annotation("Specify the similarity algorithm that you want. First 2 are for TFIDF and last 3 are for Ngram", "option", "s", str, ["ScoreSim", "CosineSim", "DiceSim", "BigramCosineSim"], metavar="{ScoreSim,CosineSim,DiceSim,BigramCosineSim}"),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")  
+)
+
+def evaluate(inputFile, processedLicense, ngram_json, agent_name="wordFrequencySimilarity", similarity="CosineSim", verbose=False):
   defaultProcessed = resource_filename("atarashi", "data/licenses/processedLicenses.csv")
   defaultJSON = resource_filename("atarashi", "data/Ngram_keywords.json")
-  parser = argparse.ArgumentParser()
-  parser.add_argument("inputFile", help="Specify the input file path to scan")
-  parser.add_argument("-l", "--processedLicenseList", required=False,
-                      help="Specify the location of processed license list file")
-  parser.add_argument("-a", "--agent_name", required=True,
-                      choices=['wordFrequencySimilarity', 'DLD', 'tfidf', 'Ngram'],
-                      help="Name of the agent that needs to be run")
-  parser.add_argument("-s", "--similarity", required=False, default="CosineSim",
-                      choices=["ScoreSim", "CosineSim", "DiceSim", "BigramCosineSim"],
-                      help="Specify the similarity algorithm that you want."
-                           " First 2 are for TFIDF and last 3 are for Ngram")
-  parser.add_argument("-j", "--ngram_json", required=False,
-                      help="Specify the location of Ngram JSON (for Ngram agent only)")
-  parser.add_argument("-v", "--verbose", help="increase output verbosity",
-                      action="count", default=0)
-  parser.add_argument('-V', '--version', action='version', version='%(prog)s ' + __version__)
-  args = parser.parse_args()
-  inputFile = args.inputFile
-  agent_name = args.agent_name
-  similarity = args.similarity
-  verbose = args.verbose
-  processedLicense = args.processedLicenseList
-  ngram_json = args.ngram_json
 
   if processedLicense is None:
     processedLicense = defaultProcessed
   if ngram_json is None:
     ngram_json = defaultJSON
+  if similarity is None:
+    similarity = "CosineSim"
 
-  result = atarashii_runner(inputFile, processedLicense, agent_name, similarity, ngram_json, verbose)
+  '''
+  Calls atarashii_runner for each file in the folder/ repository specified by user
+  Prints the Input file path and the JSON output from atarashii_runner
+  '''
+  result = atarashii_runner(inputFile, agent_name, processedLicense, similarity, ngram_json, verbose)
   if agent_name == "wordFrequencySimilarity":
     result = [{
             "shortname": str(result),
@@ -135,11 +125,17 @@ def main():
             "sim_type": "dld",
             "description": ""
         }]
-  result = list(result)
-  result = {"file": os.path.abspath(inputFile), "results": result}
-  result = json.dumps(result, sort_keys=True, ensure_ascii=False, indent=4)
-  print(result + "\n")
+        
+  if result != -1:
+    result = list(result)
+    result = {"file": os.path.abspath(inputFile), "results": result}
+    result = json.dumps(result, sort_keys=True, ensure_ascii=False, indent=4)
+    print(result + "\n")
+
+
+def main():
+  plac.call(evaluate)
 
 
 if __name__ == '__main__':
-  main()
+  plac.call(evaluate)
\ No newline at end of file
diff --git a/atarashi/build_deps.py b/atarashi/build_deps.py
index 170c165c..848903ac 100755
--- a/atarashi/build_deps.py
+++ b/atarashi/build_deps.py
@@ -22,7 +22,7 @@
 __author__ = "Gaurav Mishra"
 __email__ = "gmishx@gmail.com"
 
-import argparse
+import plac
 import os
 import sys
 sys.path.insert(0, os.path.dirname(os.path.realpath(__file__)) + '/../')
@@ -40,7 +40,12 @@
 The merged CSV is then processesed which is then used to create the Ngrams.
 """
 
-def download_dependencies(threads = os.cpu_count(), verbose = 0):
+@plac.annotations(
+  threads = plac.Annotation("No of threads to use for download. Default: CPU count", "option", "t", int, metavar="THREADS"),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")
+)
+
+def download_dependencies(threads = os.cpu_count(), verbose = False):
   currentDir = os.path.dirname(os.path.abspath(__file__))
   licenseListCsv = currentDir + "/data/licenses/licenseList.csv"
   processedLicenseListCsv = currentDir + "/data/licenses/processedLicenses.csv"
@@ -59,14 +64,4 @@ def download_dependencies(threads = os.cpu_count(), verbose = 0):
   createNgrams(processedLicenseListCsv, ngramJsonLoc, threads, verbose)
 
 if __name__ == "__main__":
-  parser = argparse.ArgumentParser()
-  parser.add_argument("-t", "--threads", required = False, default = os.cpu_count(),
-                      type = int,
-                      help = "No of threads to use for download. Default: CPU count")
-  parser.add_argument("-v", "--verbose", help = "increase output verbosity",
-                      action = "count", default = 0)
-  args = parser.parse_args()
-  threads = args.threads
-  verbose = args.verbose
-
-  download_dependencies(threads, verbose)
+  plac.call(download_dependencies)
\ No newline at end of file
diff --git a/atarashi/evaluator/evaluator.py b/atarashi/evaluator/evaluator.py
index 637fe3ad..d5bd97dd 100644
--- a/atarashi/evaluator/evaluator.py
+++ b/atarashi/evaluator/evaluator.py
@@ -23,8 +23,9 @@
 from tqdm import tqdm
 import shutil
 import sys
-import argparse
-from multiprocessing import Pool
+import plac
+from multiprocessing import Pool, freeze_support
+from functools import partial
 
 __author__ = "Ayush Bhardwaj"
 __email__ = "classicayush@gmail.com"
@@ -78,10 +79,8 @@ def getCommand(agent_name, similarity):
       return -1
   return command
 
-filesScanned = 0
-match = 0
 
-def processFile(filepath):
+def processFile(filepath, command):
   '''
   processFile function runs the agent command on the bash/terminal and gets the result for the given file
 
@@ -115,6 +114,8 @@ def processFile(filepath):
         return 0
     except Exception:
       return 0
+  else:
+    return 0
 
 def evaluate(command):
   '''
@@ -133,7 +134,7 @@ def evaluate(command):
       fileList.append(filepath)
 
   with Pool(os.cpu_count()) as p:
-    result = list(tqdm(p.imap_unordered(processFile, fileList), total=len(fileList), unit="files"))
+    result = list(tqdm(p.imap_unordered(partial(processFile, command=command), fileList), total=len(fileList), unit="files"))
 
   # success_count is the count of successfully matched files  
   success_count = sum(result)
@@ -144,27 +145,21 @@ def evaluate(command):
   return (timeElapsed, accuracy)
 
 
-if __name__ == "__main__":
-  parser = argparse.ArgumentParser()
-  parser.add_argument("-a", "--agent_name", required=True,
-                      choices=['wordFrequencySimilarity', 'DLD', 'tfidf', 'Ngram'], help="Name of the agent that you want to evaluate")
-  parser.add_argument("-s", "--similarity", required=False,
-                      default=" ", choices=["ScoreSim", "CosineSim", "DiceSim", " ", "BigramCosineSim"], help="Specify the similarity algorithm that you want to evaluate"
-                      " First 2 are for TFIDF and last 3 are for Ngram")
-  args = parser.parse_args()
-  agent_name = args.agent_name
-  similarity = args.similarity
-
+@plac.annotations(  
+  similarity = plac.Annotation("Specify the similarity algorithm that you want to evaluate. First 2 are for TFIDF and last 3 are for Ngram", "option", "s", str, ["ScoreSim", "CosineSim", "DiceSim", " ", "BigramCosineSim"], metavar="{ScoreSim,CosineSim,DiceSim, ,BigramCosineSim}"),
+  agent_name = plac.Annotation("Name of the agent that you want to evaluate", "option", "a", str, ["wordFrequencySimilarity", "DLD", "tfidf", "Ngram"], metavar="{wordFrequencySimilarity,DLD,tfidf,Ngram}")
+)
 
+def main(similarity, agent_name="wordFrequencySimilarity"):
   command = getCommand(agent_name, similarity)
-  timeElapsed, accuracy = evaluate(command)
-  print('\n' + '      ++++++++++++++++++ Result ++++++++++++++++++')
-  print('      ++++++++++++++++++++++++++++++++++++++++++++')
-  prGreen("     ---> Total time elapsed: " + str(round(timeElapsed, 2)) + " Seconds  <---")
-  prGreen("     ---> Accuracy: " + str(round(accuracy, 2)) + "%                     <---")
-  print('      ++++++++++++++++++++++++++++++++++++++++++++')
-  print('      ++++++++++++++++++++++++++++++++++++++++++++')
-
+  if command != -1:
+    timeElapsed, accuracy = evaluate(command)
+    print('\n' + '      ++++++++++++++++++ Result ++++++++++++++++++')
+    print('      ++++++++++++++++++++++++++++++++++++++++++++')
+    prGreen("     ---> Total time elapsed: " + str(round(timeElapsed, 2)) + " Seconds  <---")
+    prGreen("     ---> Accuracy: " + str(round(accuracy, 2)) + "%                     <---")
+    print('      ++++++++++++++++++++++++++++++++++++++++++++')
+    print('      ++++++++++++++++++++++++++++++++++++++++++++')
 
   zf = zipfile.ZipFile("TestFiles.zip", "w")
   for dirname, subdirs, files in os.walk("TestFiles"):
@@ -173,5 +168,8 @@ def evaluate(command):
         zf.write(os.path.join(dirname, filename))
   zf.close()
 
-  shutil.rmtree('TestFiles')
+  shutil.rmtree('TestFiles')  
 
+if __name__ == "__main__":
+  freeze_support()
+  plac.call(main)
\ No newline at end of file
diff --git a/atarashi/imtihaan.py b/atarashi/imtihaan.py
index 82992a21..3832fdbd 100644
--- a/atarashi/imtihaan.py
+++ b/atarashi/imtihaan.py
@@ -18,7 +18,7 @@
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-import argparse
+import plac
 import os
 import sys
 from sys import exit
@@ -30,28 +30,17 @@
 __author__ = "Aman Jain"
 __email__ = "amanjain5221@gmail.com"
 
-args = None
 
-if __name__ == "__main__":
-  parser = argparse.ArgumentParser()
-  parser.add_argument("processedLicenseList", help="Specify the processed license list file which contains licenses")
-  parser.add_argument("AgentName", choices=['DLD', 'tfidf', 'Ngram'],
-                      help="Name of the agent that needs to be run")
-  parser.add_argument("TestFiles", help="Specify the folder path that needs to be tested")
-  parser.add_argument("-s", "--similarity", required=False, default="CosineSim",
-                      choices=["ScoreSim", "CosineSim", "DiceSim", "BigramCosineSim"],
-                      help="Specify the similarity algorithm that you want."
-                           " First 2 are for TFIDF and last 3 are for Ngram")
-  parser.add_argument("-j", "--ngram_json", required=False,
-                      help="Specify the location of Ngram JSON (for Ngram agent only)")
-  parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true")
-  args = parser.parse_args()
-  agent_name = args.AgentName
-  processedLicense = args.processedLicenseList
-  testFilePath = args.TestFiles
-  similarity = args.similarity
-  ngram_json = args.ngram_json
+@plac.annotations(
+  ngram_json = plac.Annotation("Specify the location of Ngram JSON (for Ngram agent only)", "option", "j"),
+  processedLicense = plac.Annotation("Specify the processed license list file which contains licenses", metavar="processedLicenseList"),
+  agent_name = plac.Annotation("Name of the agent that needs to be run", "positional", None, str, ["DLD", "tfidf", "Ngram"], metavar="{DLD,tfidf,Ngram}"),
+  testFilePath = plac.Annotation("Specify the folder path that needs to be tested", metavar="TestFiles"),
+  similarity = plac.Annotation("Specify the similarity algorithm that you want. First 2 are for TFIDF and last 3 are for Ngram", "option", "s", str, ["ScoreSim", "CosineSim", "DiceSim", "BigramCosineSim"], metavar="{ScoreSim,CosineSim,DiceSim,BigramCosineSim}"),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")
+)
 
+def main(ngram_json, processedLicense, agent_name, testFilePath, similarity="CosineSim", verbose=False):
   pathname = os.path.dirname(sys.argv[0])
   testFilePath = os.path.abspath(testFilePath)
 
@@ -85,4 +74,7 @@
       print(filepath.split('tests/')[1])
       actual_license = filepath.split('/')[-1].split('.c')[0]
       result = scanner.scan(filepath)
-      print("Actual License: " + actual_license + "\nResult: " + str(result) + "\n")
+      print("Actual License: " + actual_license + "\nResult: " + str(result) + "\n")  
+
+if __name__ == "__main__":
+  plac.call(main)
\ No newline at end of file
diff --git a/atarashi/libs/commentPreprocessor.py b/atarashi/libs/commentPreprocessor.py
index a208ba69..91365db0 100644
--- a/atarashi/libs/commentPreprocessor.py
+++ b/atarashi/libs/commentPreprocessor.py
@@ -19,7 +19,7 @@
 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 
-import argparse
+import plac
 from nirjas import extract
 import json
 import os
@@ -31,7 +31,6 @@
 __author__ = "Aman Jain"
 __email__ = "amanjain5221@gmail.com"
 
-args = None
 
 def licenseComment(data):
     list = ['source', 'free', 'under','use',  'copyright', 'grant', 'software', 'license','licence', 'agreement', 'distribute', 'redistribution', 'liability', 'rights', 'reserved', 'general', 'public', 'modify', 'modified', 'modification', 'permission','permitted' 'granted', 'distributed', 'notice', 'distribution', 'terms', 'freely', 'licensed', 'merchantibility','redistributed', 'see', 'read', '(c)', 'copying', 'legal', 'licensing', 'spdx']
@@ -139,20 +138,15 @@ def extract(inputFile):
     return outputFile
 
 
-if __name__ == "__main__":
-  print("The file has been run directly")
-  parser = argparse.ArgumentParser()
-  parser.add_argument("-p", "--process", required=True,
-                      choices=['preprocess', 'extract'],
-                      help="Which process you want to run")
-  parser.add_argument("inputFile", help="Specify the input file which needs to be processed")
-  parser.add_argument("-v", "--verbose", help="increase output verbosity",
-                      action="count", default=0)
-  args = parser.parse_args()
-  process = args.process
-  inputFile = args.inputFile
-  verbose = args.verbose
+@plac.annotations(
+  process = plac.Annotation("Which process you want to run", "option", "p", str, ["preprocess", "extract"], metavar="{preprocess,extract}"),
+  inputFile = plac.Annotation("Specify the input file which needs to be processed"),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")  
+)
 
+
+def main(process, inputFile, verbose=False):
+  print("The file has been run directly")
   if process == "extract":
     tempLoc = str(CommentPreprocessor.extract(inputFile))
     print("Temporary output file path: ", tempLoc)
@@ -162,3 +156,7 @@ def extract(inputFile):
     with open(inputFile) as file:
       data = file.read().replace('\n', ' ')
       print("Preprocessed data is: ", str(CommentPreprocessor.preprocess(data)))
+
+
+if __name__ == "__main__":
+  plac.call(main)
\ No newline at end of file
diff --git a/atarashi/libs/initialmatch.py b/atarashi/libs/initialmatch.py
index 1c47bb55..a2c0024a 100644
--- a/atarashi/libs/initialmatch.py
+++ b/atarashi/libs/initialmatch.py
@@ -130,4 +130,4 @@ def initial_match(filePath, processedData, licenses):
       })
 
   matches = list(itertools.chain(spdx_identifiers, exact_match_header, exact_match_fulltext, header_sim_match[:5]))
-  return matches
+  return matches
\ No newline at end of file
diff --git a/atarashi/libs/license_clustering.py b/atarashi/libs/license_clustering.py
index f3f603ef..0e41477d 100644
--- a/atarashi/libs/license_clustering.py
+++ b/atarashi/libs/license_clustering.py
@@ -18,7 +18,7 @@
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-import argparse
+import plac
 import time
 
 from atarashi.libs.utils import cosine_similarity
@@ -120,17 +120,16 @@ def cluster_licenses(licenseList, verbose=0):
   return result
 
 
-if __name__ == "__main__":
-  parser = argparse.ArgumentParser()
-  parser.add_argument("processedLicenseList", help="Specify the processed license list file")
-  parser.add_argument("-v", "--verbose", help="increase output verbosity",
-                      action="count", default=0)
-  args = parser.parse_args()
-
-  licenseList = args.processedLicenseList
-  verbose = args.verbose
+@plac.annotations(
+  licenseList = plac.Annotation("Specify the processed license list file", "positional", None, str, metavar="processedLicenseList"),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")  
+)
 
+def main(licenseList, verbose=False):
   start = time.time()
   cluster = cluster_licenses(licenseList, verbose)
   print("Time taken is ", str(time.time() - start))
   print(cluster)
+
+if __name__ == "__main__":
+  plac.call(main)
\ No newline at end of file
diff --git a/atarashi/libs/ngram.py b/atarashi/libs/ngram.py
index cf112211..597dec8b 100644
--- a/atarashi/libs/ngram.py
+++ b/atarashi/libs/ngram.py
@@ -18,7 +18,7 @@
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-import argparse
+import plac
 import json
 from multiprocessing import Pool as ThreadPool
 import os
@@ -137,23 +137,15 @@ def createNgrams(licenseList, ngramJsonLoc, threads=os.cpu_count(), verbose=0):
   return ngramJsonLoc, matched_output, no_keyword_matched
 
 
-if __name__ == '__main__':
-  parser = argparse.ArgumentParser()
-  parser.add_argument("processedLicenseList", help="Specify the processed license list file")
-  parser.add_argument("ngramJson", help="Specify the location to store "
-                                        "NGRAM JSON")
-  parser.add_argument("-t", "--threads", required=False, default=os.cpu_count(),
-                      type=int,
-                      help="No of threads to use for download. Default: CPU count")
-  parser.add_argument("-v", "--verbose", help="increase output verbosity",
-                      action="count", default=0)
-  args = parser.parse_args()
-
-  licenseList = args.processedLicenseList
-  threads = args.threads
-  ngramJsonLoc = args.ngramJson
-  verbose = args.verbose
+@plac.annotations(
+  licenseList = plac.Annotation("Specify the processed license list file", "positional", None, str, metavar="processedLicenseList"),
+  ngramJsonLoc = plac.Annotation("Specify the location to store NGRAM JSON", metavar="ngramJson"),
+  threads = plac.Annotation("No of threads to use for download. Default: CPU count", "option", "t", int),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")  
+)
+
 
+def main(licenseList, ngramJsonLoc, threads=os.cpu_count(), verbose=False):
   createNgrams(licenseList, ngramJsonLoc, threads, verbose=verbose)
   if verbose > 0:
     print(matched_output)
@@ -168,3 +160,6 @@ def createNgrams(licenseList, ngramJsonLoc, threads=os.cpu_count(), verbose=0):
 4. store the unique ngrams in a file (maybe csv or any file)
 
 '''
+
+if __name__ == '__main__':
+  plac.call(main)
\ No newline at end of file
diff --git a/atarashi/libs/utils.py b/atarashi/libs/utils.py
index b3b5d1a1..dedb9177 100644
--- a/atarashi/libs/utils.py
+++ b/atarashi/libs/utils.py
@@ -66,4 +66,4 @@ def cosine_similarity(a, b):
   if temp == 0:
     return 0
   else:
-    return dot_product / temp
+    return dot_product / temp
\ No newline at end of file
diff --git a/atarashi/license/licenseDownloader.py b/atarashi/license/licenseDownloader.py
index ad3f0d5a..0d17af9d 100644
--- a/atarashi/license/licenseDownloader.py
+++ b/atarashi/license/licenseDownloader.py
@@ -18,7 +18,7 @@
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-import argparse
+import plac
 from builtins import staticmethod
 import json
 from multiprocessing import Pool as ThreadPool
@@ -168,14 +168,13 @@ def fetch_exceptional_license(license):
     return pd.DataFrame(licenseDict, columns=csvColumns)
 
 
-if __name__ == "__main__":
-  parser = argparse.ArgumentParser()
-  parser.add_argument("-t", "--threads", required=False, default=os.cpu_count(),
-                      type=int,
-                      help="No of threads to use for download. Default: CPU count")
-  parser.add_argument("-f", "--force", action="store_true",
-                      help="Force download regardless of existing list")
-  args = parser.parse_args()
-  threads = args.threads
-  force = args.force
+@plac.annotations(
+  force = plac.Annotation("Force download regardless of existing list", "flag", "f"),
+  threads = plac.Annotation("No of threads to use for download. Default: CPU count", "option", "t", int)
+)
+
+def main(force, threads):
   print(LicenseDownloader.download_license(threads, force))
+
+if __name__ == "__main__":
+  plac.call(main)
\ No newline at end of file
diff --git a/atarashi/license/licenseLoader.py b/atarashi/license/licenseLoader.py
index 81fb4825..82424910 100644
--- a/atarashi/license/licenseLoader.py
+++ b/atarashi/license/licenseLoader.py
@@ -36,4 +36,4 @@ def fetch_licenses(licenseList):  # common
     '''
     licenseDataFrame = pd.read_csv(licenseList)
     licenseDataFrame = licenseDataFrame.replace(np.nan, '', regex = True)
-    return licenseDataFrame
+    return licenseDataFrame
\ No newline at end of file
diff --git a/atarashi/license/licensePreprocessor.py b/atarashi/license/licensePreprocessor.py
index c2efe7c9..722d24d6 100644
--- a/atarashi/license/licensePreprocessor.py
+++ b/atarashi/license/licensePreprocessor.py
@@ -22,7 +22,7 @@
 __author__ = "Gaurav Mishra"
 __email__ = "gmishx@gmail.com"
 
-import argparse
+import plac
 import os
 from pathlib import Path
 
@@ -31,8 +31,6 @@
 from atarashi.libs.commentPreprocessor import CommentPreprocessor
 from atarashi.license.licenseLoader import LicenseLoader
 
-args = None
-
 
 class LicensePreprocessor(object):
 
@@ -103,15 +101,19 @@ def create_processed_file(licenseList, processedFile, verbose=0):
     return processedFile
 
 
-if __name__ == "__main__":
-  parser = argparse.ArgumentParser()
-  parser.add_argument("licenseList", help="Specify the license list file which contains licenses")
-  parser.add_argument("processedFile", help="Specify the destination to store processed list")
-  parser.add_argument("-v", "--verbose", help="increase output verbosity",
-                      action="count", default=0)
-  args = parser.parse_args()
+@plac.annotations(
+  licenseList = plac.Annotation("Specify the license list file which contains licenses", "positional"),
+  processedFile = plac.Annotation("Specify the destination to store processed list", "positional"),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")  
+)
+
+
+def main(licenseList, processedFile, verbose=False):
   licenseList = os.path.abspath(args.licenseList)
   processedFile = os.path.abspath(args.processedFile)
-  verbose = args.verbose
 
   print("Use: " + LicensePreprocessor.create_processed_file(licenseList, processedFile, verbose))
+
+
+if __name__ == "__main__":
+  plac.call(main)
\ No newline at end of file
diff --git a/atarashi/license/license_merger.py b/atarashi/license/license_merger.py
index 182b2c66..8720f8e3 100644
--- a/atarashi/license/license_merger.py
+++ b/atarashi/license/license_merger.py
@@ -22,7 +22,7 @@
 __author__ = "Aman Jain"
 __email__ = "amanjain5221@gmail.com"
 
-import argparse
+import plac
 import os
 from pathlib import Path
 
@@ -95,18 +95,18 @@ def license_merger(licenseList, requiredlicenseList, verbose=0):
   return str(Path(os.path.abspath(requiredlicenseList)))
 
 
-if __name__ == "__main__":
-  parser = argparse.ArgumentParser()
-  parser.add_argument("licenseList", help="Specify the license list file of fossology which contains licenses")
-  parser.add_argument("requiredlicenseList", help="Specify the license list file in which you want to merge licenses")
-  parser.add_argument("-v", "--verbose", help="increase output verbosity",
-                      action="count", default=0)
-  args = parser.parse_args()
+@plac.annotations(
+  licenseList = plac.Annotation("Specify the license list file of fossology which contains licenses", "positional"),
+  requiredlicenseList = plac.Annotation("Specify the license list file in which you want to merge licenses", "positional"),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")  
+)
 
-  licenseList = args.licenseList
-  requiredlicenseList = args.requiredlicenseList
-  verbose = args.verbose
 
+def main(licenseList, requiredlicenseList, verbose=False):
   filePath = license_merger(licenseList, requiredlicenseList, verbose)
   if filePath:
     print("Updated", filePath)
+
+
+if __name__ == "__main__":
+  plac.call(main)
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 716e93dc..cc4323cb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,5 +10,6 @@ requires = [
   "textdistance>=3.0.3",
   "pyxDamerauLevenshtein>=1.5",
   "nirjas>=0.0.3",
-  "urllib3>=1.24.1"
+  "urllib3>=1.24.1",
+  "plac>=1.2.0"
 ]
diff --git a/requirements.txt b/requirements.txt
index b90ce4a7..b1cea506 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,4 +7,5 @@ spacy>=2.0.11
 textdistance>=3.0.3
 setuptools>=39.2.0
 nirjas>=0.0.3
-urllib3>=1.24.1
\ No newline at end of file
+urllib3>=1.24.1
+plac>=1.2.0
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 5177656b..e1fcd86a 100755
--- a/setup.py
+++ b/setup.py
@@ -153,4 +153,4 @@ def run(self):
   },
 )
 
-setup(**metadata)
+setup(**metadata)
\ No newline at end of file