Skip to content

Commit

Permalink
refactor(argparse): Shifted from argparse to plac
Browse files Browse the repository at this point in the history
  • Loading branch information
Aman-Codes committed Dec 30, 2020
1 parent 406955b commit 28e37a3
Show file tree
Hide file tree
Showing 23 changed files with 211 additions and 247 deletions.
3 changes: 1 addition & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,4 @@ include atarashi/data/Ngram_keywords.json

prune .git
prune venv
prune test*

prune test*
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,4 +195,4 @@ This will generate file in `docs/_build/html`. Go to: index.html

You can change the theme of the documentation by changing `html_theme` in config.py file in `docs/` folder.
You can choose from {'alabaster', 'classic', 'sphinxdoc', 'scrolls', 'agogo', 'traditional', 'nature', 'haiku', 'pyramid', 'bizstyle'}
[Reference](https://www.sphinx-doc.org/en/master/usage/theming.html)
[Reference](https://www.sphinx-doc.org/en/master/usage/theming.html)
2 changes: 1 addition & 1 deletion atarashi/agents/atarashiAgent.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,4 @@ def exactMatcher(licenseText, licenses):
output.append(licenses.iloc[idx]['shortname'])
if not output:
return -1
return output
return output
32 changes: 14 additions & 18 deletions atarashi/agents/cosineSimNgram.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
import argparse
import plac
from enum import Enum
import itertools
import json
Expand Down Expand Up @@ -184,24 +184,16 @@ def setSimAlgo(self, newAlgo):
self.simType = newAlgo


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("processedLicenseList", help="Specify the processed license list file")
parser.add_argument("ngramJson", help="Specify the location of NGRAM JSON")
parser.add_argument("inputFile", help="Specify the input file which needs to be scanned")
parser.add_argument("-s", "--similarity", required=False, default="BigramCosineSim",
choices=["CosineSim", "DiceSim", "BigramCosineSim"],
help="Specify the similarity algorithm that you want")
parser.add_argument("-v", "--verbose", help="increase output verbosity",
action='count', default=0)
args = parser.parse_args()

licenseList = args.processedLicenseList
ngramJsonLoc = args.ngramJson
inputFile = args.inputFile
simType = args.similarity
verbose = args.verbose
@plac.annotations(
licenseList = plac.Annotation("Specify the processed license list file", "positional", None, str, metavar="processedLicenseList"),
ngramJsonLoc = plac.Annotation("Specify the location of NGRAM JSON", metavar="ngramJson"),
inputFile = plac.Annotation("Specify the input file which needs to be scanned"),
similarity = plac.Annotation("Specify the similarity algorithm that you want", "option", "s", str, ["CosineSim", "DiceSim", "BigramCosineSim"], metavar="{CosineSim,DiceSim,BigramCosineSim}"),
verbose = plac.Annotation("increase output verbosity", "flag", "v")
)

def main(licenseList, ngramJsonLoc, inputFile, similarity="BigramCosineSim", verbose=False):
simType = similarity
scanner = NgramAgent(licenseList, ngramJson=ngramJsonLoc, verbose=verbose)
if simType == "CosineSim":
scanner.setSimAlgo(NgramAgent.NgramAlgo.cosineSim)
Expand All @@ -215,3 +207,7 @@ def setSimAlgo(self, newAlgo):
print("N-Gram identifier and " + str(simType) + " is " + str(result))
else:
print("Result is nothing")


if __name__ == "__main__":
plac.call(main)
28 changes: 14 additions & 14 deletions atarashi/agents/dameruLevenDist.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""

import argparse
import plac
import sys

from pyxdameraulevenshtein import damerau_levenshtein_distance
Expand Down Expand Up @@ -62,18 +62,18 @@ def scan(self, filePath):
return temp[0]


if __name__ == "__main__":
print("The file has been run directly")
parser = argparse.ArgumentParser()
parser.add_argument("inputFile", help="Specify the input file which needs to be scanned")
parser.add_argument("processedLicenseList",
help="Specify the processed license list file which contains licenses")
parser.add_argument("-v", "--verbose", help="increase output verbosity",
action="count", default=0)
args = parser.parse_args()
filename = args.inputFile
licenseList = args.processedLicenseList
verbose = args.verbose
@plac.annotations(
filename = plac.Annotation("Specify the input file which needs to be scanned", metavar="inputFile"),
licenseList = plac.Annotation("Specify the processed license list file which contains licenses", "positional", None, str, metavar="processedLicenseList"),
verbose = plac.Annotation("increase output verbosity", "flag", "v")
)


def main(filename, licenseList, verbose=False):
print("The file has been run directly")
scanner = DameruLevenDist(licenseList, verbose=verbose)
print("License Detected using Dameru Leven Distance: " + str(scanner.scan(filename)))
print("License Detected using Dameru Leven Distance: " + str(scanner.scan(filename)))


if __name__ == "__main__":
plac.call(main)
30 changes: 12 additions & 18 deletions atarashi/agents/tfidf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
__author__ = "Aman Jain"
__email__ = "[email protected]"

import argparse
import plac
from enum import Enum
import itertools
import time
Expand Down Expand Up @@ -151,28 +151,22 @@ def setSimAlgo(self, newAlgo):
self.algo = newAlgo


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-s", "--tfidf_similarity", required=False,
default="ScoreSim",
choices=["CosineSim", "ScoreSim"],
help="Specify the similarity algorithm that you want")
parser.add_argument("inputFile", help="Specify the input file which needs to be scanned")
parser.add_argument("processedLicenseList",
help="Specify the processed license list file which contains licenses")
parser.add_argument("-v", "--verbose", help="increase output verbosity",
action="count", default=0)
args = parser.parse_args()

tfidf_similarity = args.tfidf_similarity
filename = args.inputFile
licenseList = args.processedLicenseList
verbose = args.verbose
@plac.annotations(
filename = plac.Annotation("Specify the input file which needs to be scanned", metavar="inputFile"),
licenseList = plac.Annotation("Specify the processed license list file which contains licenses", "positional", None, str, metavar="processedLicenseList"),
tfidf_similarity = plac.Annotation("Specify the similarity algorithm that you want", "option", "s", str, ["CosineSim", "ScoreSim"], metavar="{CosineSim,ScoreSim}"),
verbose = plac.Annotation("increase output verbosity", "flag", "v")
)


def main(filename, licenseList, tfidf_similarity="ScoreSim", verbose=False):
scanner = TFIDF(licenseList, verbose=verbose)
if tfidf_similarity == "CosineSim":
scanner.setSimAlgo(TFIDF.TfidfAlgo.cosineSim)
print("License Detected using TF-IDF algorithm + cosine similarity " + str(scanner.scan(filename)))
else:
scanner.setSimAlgo(TFIDF.TfidfAlgo.scoreSim)
print("License Detected using TF-IDF algorithm + sum score " + str(scanner.scan(filename)))

if __name__ == "__main__":
plac.call(main)
27 changes: 13 additions & 14 deletions atarashi/agents/wordFrequencySimilarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
__author__ = "Aman Jain"
__email__ = "[email protected]"

import argparse
import plac
import re

from atarashi.agents.atarashiAgent import AtarashiAgent, exactMatcher
Expand Down Expand Up @@ -79,19 +79,18 @@ def scan(self, filePath):
return temp


if __name__ == "__main__":
print("The file has been called from main")
parser = argparse.ArgumentParser()
parser.add_argument("inputFile", help = "Specify the input file which needs to be scanned")
parser.add_argument("processedLicenseList",
help = "Specify the processed license list file which contains licenses")
parser.add_argument("-v", "--verbose", help = "increase output verbosity",
action = "count", default = 0)

args = parser.parse_args()
filename = args.inputFile
licenseList = args.processedLicenseList
verbose = args.verbose
@plac.annotations(
filename = plac.Annotation("Specify the input file which needs to be scanned", metavar="inputFile"),
licenseList = plac.Annotation("Specify the processed license list file which contains licenses", "positional", None, str, metavar="processedLicenseList"),
verbose = plac.Annotation("increase output verbosity", "flag", "v")
)


def main(filename, licenseList, verbose=False):
print("The file has been called from main")
scanner = WordFrequencySimilarity(licenseList, verbose = verbose)
print("The result from Histogram similarity algo is ", scanner.scan(filename))


if __name__ == "__main__":
plac.call(main)
70 changes: 33 additions & 37 deletions atarashi/atarashii.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
import argparse
import plac
import os
import json
from pkg_resources import resource_filename
Expand All @@ -33,7 +33,9 @@
__version__ = "0.0.11"


def atarashii_runner(inputFile, processedLicense, agent_name, similarity="CosineSim", ngramJsonLoc=None, verbose=None):


def atarashii_runner(inputFile, agent_name, processedLicense, similarity="CosineSim", ngram_json=None, verbose=None):
'''
:param inputFile: Input File for scanning of license
:param processedLicense: Processed License List (CSV) path (Default path already provided)
Expand Down Expand Up @@ -68,7 +70,7 @@ def atarashii_runner(inputFile, processedLicense, agent_name, similarity="Cosine
print("Please choose similarity from {CosineSim,ScoreSim}")
return -1
elif agent_name == "Ngram":
scanner = NgramAgent(processedLicense, ngramJson=ngramJsonLoc)
scanner = NgramAgent(processedLicense, ngramJson=ngram_json)
if similarity == "CosineSim":
scanner.setSimAlgo(NgramAgent.NgramAlgo.cosineSim)
elif similarity == "DiceSim":
Expand All @@ -84,43 +86,31 @@ def atarashii_runner(inputFile, processedLicense, agent_name, similarity="Cosine
return result


def main():
'''
Calls atarashii_runner for each file in the folder/ repository specified by user
Prints the Input file path and the JSON output from atarashii_runner
'''
@plac.annotations(
agent_name = plac.Annotation("Name of the agent that needs to be run", "option", "a", str, ["wordFrequencySimilarity", "DLD", "tfidf", "Ngram"], metavar="{wordFrequencySimilarity,DLD,tfidf,Ngram}"),
inputFile = plac.Annotation("Specify the input file path to scan", "positional", None, str, metavar="inputFile"),
processedLicense = plac.Annotation("Specify the location of processed license list file", "option", "l", str, metavar="PROCESSEDLICENSELIST"),
ngram_json = plac.Annotation("Specify the location of Ngram JSON (for Ngram agent only)", "option", "j"),
similarity = plac.Annotation("Specify the similarity algorithm that you want. First 2 are for TFIDF and last 3 are for Ngram", "option", "s", str, ["ScoreSim", "CosineSim", "DiceSim", "BigramCosineSim"], metavar="{ScoreSim,CosineSim,DiceSim,BigramCosineSim}"),
verbose = plac.Annotation("increase output verbosity", "flag", "v")
)

def evaluate(inputFile, processedLicense, ngram_json, agent_name="wordFrequencySimilarity", similarity="CosineSim", verbose=False):
defaultProcessed = resource_filename("atarashi", "data/licenses/processedLicenses.csv")
defaultJSON = resource_filename("atarashi", "data/Ngram_keywords.json")
parser = argparse.ArgumentParser()
parser.add_argument("inputFile", help="Specify the input file path to scan")
parser.add_argument("-l", "--processedLicenseList", required=False,
help="Specify the location of processed license list file")
parser.add_argument("-a", "--agent_name", required=True,
choices=['wordFrequencySimilarity', 'DLD', 'tfidf', 'Ngram'],
help="Name of the agent that needs to be run")
parser.add_argument("-s", "--similarity", required=False, default="CosineSim",
choices=["ScoreSim", "CosineSim", "DiceSim", "BigramCosineSim"],
help="Specify the similarity algorithm that you want."
" First 2 are for TFIDF and last 3 are for Ngram")
parser.add_argument("-j", "--ngram_json", required=False,
help="Specify the location of Ngram JSON (for Ngram agent only)")
parser.add_argument("-v", "--verbose", help="increase output verbosity",
action="count", default=0)
parser.add_argument('-V', '--version', action='version', version='%(prog)s ' + __version__)
args = parser.parse_args()
inputFile = args.inputFile
agent_name = args.agent_name
similarity = args.similarity
verbose = args.verbose
processedLicense = args.processedLicenseList
ngram_json = args.ngram_json

if processedLicense is None:
processedLicense = defaultProcessed
if ngram_json is None:
ngram_json = defaultJSON
if similarity is None:
similarity = "CosineSim"

result = atarashii_runner(inputFile, processedLicense, agent_name, similarity, ngram_json, verbose)
'''
Calls atarashii_runner for each file in the folder/ repository specified by user
Prints the Input file path and the JSON output from atarashii_runner
'''
result = atarashii_runner(inputFile, agent_name, processedLicense, similarity, ngram_json, verbose)
if agent_name == "wordFrequencySimilarity":
result = [{
"shortname": str(result),
Expand All @@ -135,11 +125,17 @@ def main():
"sim_type": "dld",
"description": ""
}]
result = list(result)
result = {"file": os.path.abspath(inputFile), "results": result}
result = json.dumps(result, sort_keys=True, ensure_ascii=False, indent=4)
print(result + "\n")

if result != -1:
result = list(result)
result = {"file": os.path.abspath(inputFile), "results": result}
result = json.dumps(result, sort_keys=True, ensure_ascii=False, indent=4)
print(result + "\n")


def main():
plac.call(evaluate)


if __name__ == '__main__':
main()
plac.call(evaluate)
21 changes: 8 additions & 13 deletions atarashi/build_deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
__author__ = "Gaurav Mishra"
__email__ = "[email protected]"

import argparse
import plac
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.realpath(__file__)) + '/../')
Expand All @@ -40,7 +40,12 @@
The merged CSV is then processesed which is then used to create the Ngrams.
"""

def download_dependencies(threads = os.cpu_count(), verbose = 0):
@plac.annotations(
threads = plac.Annotation("No of threads to use for download. Default: CPU count", "option", "t", int, metavar="THREADS"),
verbose = plac.Annotation("increase output verbosity", "flag", "v")
)

def download_dependencies(threads = os.cpu_count(), verbose = False):
currentDir = os.path.dirname(os.path.abspath(__file__))
licenseListCsv = currentDir + "/data/licenses/licenseList.csv"
processedLicenseListCsv = currentDir + "/data/licenses/processedLicenses.csv"
Expand All @@ -59,14 +64,4 @@ def download_dependencies(threads = os.cpu_count(), verbose = 0):
createNgrams(processedLicenseListCsv, ngramJsonLoc, threads, verbose)

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-t", "--threads", required = False, default = os.cpu_count(),
type = int,
help = "No of threads to use for download. Default: CPU count")
parser.add_argument("-v", "--verbose", help = "increase output verbosity",
action = "count", default = 0)
args = parser.parse_args()
threads = args.threads
verbose = args.verbose

download_dependencies(threads, verbose)
plac.call(download_dependencies)
Loading

0 comments on commit 28e37a3

Please sign in to comment.