Skip to content

Commit

Permalink
Create text_processing.py
Browse files Browse the repository at this point in the history
  • Loading branch information
KOSASIH authored May 11, 2024
1 parent fa679cd commit 440843f
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions natural_language_processing/text_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import nltk

def preprocess_text(text, config):
# Preprocess text using the specified configuration
# For example, tokenize, remove stop words, or stem the words

# Tokenize the text
tokens = nltk.word_tokenize(text)

# Remove stop words
stop_words = set(nltk.corpus.stopwords.words('english'))
filtered_tokens = [token for token in tokens if token.lower() not in stop_words]

return filtered_tokens

0 comments on commit 440843f

Please sign in to comment.