diff --git a/2-svd-nmf-topic-modeling.ipynb b/2-svd-nmf-topic-modeling.ipynb index f9d50bf..2f490b5 100644 --- a/2-svd-nmf-topic-modeling.ipynb +++ b/2-svd-nmf-topic-modeling.ipynb @@ -350,7 +350,7 @@ } ], "source": [ - "from sklearn.feature_extraction import stop_words\n", + "from sklearn.feature_extraction import _stop_words as stop_words\n", "\n", "sorted(list(stop_words.ENGLISH_STOP_WORDS))[:20]" ] @@ -420,7 +420,8 @@ ], "source": [ "import nltk\n", - "nltk.download('wordnet')" + "nltk.download('wordnet')\n", + "nltk.download('omw-1.4')" ] }, { @@ -568,8 +569,8 @@ "metadata": {}, "outputs": [], "source": [ - "from spacy.lemmatizer import Lemmatizer\n", - "lemmatizer = Lemmatizer()" + "nlp = spacy.load(\"en_core_web_sm\")\n", + "doc = nlp(\" \".join(word_list))" ] }, { @@ -589,7 +590,7 @@ } ], "source": [ - "[lemmatizer.lookup(word) for word in word_list]" + "[word.text for word in doc]" ] }, { @@ -901,7 +902,7 @@ "metadata": {}, "outputs": [], "source": [ - "vocab = np.array(vectorizer.get_feature_names())" + "vocab = np.array(vectorizer.get_feature_names_out())" ] }, { diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..d9088f6 --- /dev/null +++ b/Makefile @@ -0,0 +1,26 @@ +# Makefile +# description: Install virtual environment and libraries to run course notebooks. + +PYTHON_VENV = .venv + +init: venv lib pipeline + +# Install required libraries. +lib: + @. $(PYTHON_VENV)/bin/activate && pip install \ + fbpca \ + matplotlib \ + nltk \ + numpy \ + sklearn \ + spacy + +# Download spacy pipeline. +pipeline: + . $(PYTHON_VENV)/bin/activate && python -m spacy download en_core_web_sm + +# Create virtual environment. +venv: + test -d $(PYTHON_VENV) || python3 -m venv $(PYTHON_VENV) + +.PHONY: lib pipeline