Skip to content

Commit

Permalink
update some files
Browse files Browse the repository at this point in the history
  • Loading branch information
Toyofumi Fujiwara committed Apr 23, 2024
1 parent 3ee58d1 commit 22bbd1f
Show file tree
Hide file tree
Showing 6 changed files with 156,168 additions and 79,330 deletions.
114 changes: 91 additions & 23 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion bin/get_summary_from_mhlw.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,4 @@ sed -e 's/https\:\/\/www\.mhlw\.go\.jp\/file\/06\-Seisakujouhou\-10900000\-Kenko
## python-docx 0.8.10
## setuptools 46.0.0
## wheel 0.34.2
cat ${outdir}/1_333.num.docx.tsv |while read line; do export IFS=$'\t'; set -- ${line}; echo -ne "$1\t$2\t"; python ./retrieve_txt_from_mhlw_docx.py ${outdir_docx}/$2; echo ""; done > ${outdir}/1_333.num.summary.tsv
cat ${outdir}/1_333.num.docx.tsv |while read line; do export IFS=$'\t'; set -- ${line}; echo -ne "$1\t$2\t"; python ./bin/retrieve_txt_from_mhlw_docx.py ${outdir_docx}/$2; echo ""; done > ${outdir}/1_333.num.summary.tsv
7 changes: 7 additions & 0 deletions converter/utils/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from cnorm.chain import Chain
from cnorm.rule import Rule, Greek2Alpha, Lower

from nltk.corpus import stopwords


class RemovePatterns(Rule):
def __init__(self, patterns: List[Union[str, re.Pattern]]):
Expand Down Expand Up @@ -109,6 +111,11 @@ def apply(self, text: str):
re.compile('^by$'),
re.compile('^and$'),
]
#stop_words = []
#stop_words_original = ['types', 'type', 'diseases', 'disease', 'syndromes', 'syndrome', 'disorders', 'disorder']
#stop_words_original_nltk = stop_words_original + stopwords.words('english')
#for word in stop_words_original_nltk:
# stop_words.append(re.compile(r'^{0}$'.format(word)))
rule_remove_stop_words = RemovePatterns(stop_words)

re_alunum = [re.compile('\W+')]
Expand Down
Loading

0 comments on commit 22bbd1f

Please sign in to comment.