Skip to content

Commit

Permalink
Merge pull request #92 from ranahaani/feat/remove-mongodb-and-update-…
Browse files Browse the repository at this point in the history
…requests

remove mongodb and fix requests
  • Loading branch information
ranahaani authored May 11, 2024
2 parents f0947c7 + b73510f commit b3d7b19
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 82 deletions.
29 changes: 1 addition & 28 deletions gnews/gnews.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,14 @@
import logging
import os
import sys
import urllib.request
import datetime
import inspect
import warnings

import feedparser
from bs4 import BeautifulSoup as Soup
from dotenv import load_dotenv

try:
import newspaper # Optional - required by GNews.get_full_article()
except ImportError:
pass

from gnews.utils.constants import AVAILABLE_COUNTRIES, AVAILABLE_LANGUAGES, TOPICS, BASE_URL, USER_AGENT
from gnews.utils.utils import connect_database, post_database, process_url
from gnews.utils.utils import process_url

logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO,
datefmt='%m/%d/%Y %I:%M:%S %p')
Expand Down Expand Up @@ -312,22 +304,3 @@ def _get_news(self, query):
except Exception as err:
logger.error(err.args[0])
return []

def store_in_mongodb(self, news):
"""
- MongoDB cluster needs to be created first - https://www.mongodb.com/cloud/atlas/register
- Connect to the MongoDB cluster
- Create a new collection
- Insert the news into the collection
:param news: the news object that we created in the previous function
"""

load_dotenv()

db_user = os.getenv("DB_USER")
db_pw = os.getenv("DB_PW")
db_name = os.getenv("DB_NAME")
collection_name = os.getenv("COLLECTION_NAME")

collection = connect_database(db_user, db_pw, db_name, collection_name)
post_database(collection, news)
51 changes: 0 additions & 51 deletions gnews/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@
import logging
import re

import pymongo
import requests
from gnews.utils.constants import AVAILABLE_COUNTRIES, AVAILABLE_LANGUAGES, GOOGLE_NEWS_REGEX
from pymongo import MongoClient


def lang_mapping(lang):
Expand All @@ -17,55 +15,6 @@ def country_mapping(country):
return AVAILABLE_COUNTRIES.get(country)


def connect_database(db_user, db_pw, db_name, collection_name):
"""Mongo DB Establish Cluster Connection"""

# .env file Structure:

# DB_USER="..."
# DB_PW="..."
# DB_NAME="..."
# COLLECTION_NAME="..."

# name of the mongodb cluster as well as the database name should be "gnews"

try:
cluster = MongoClient(
"mongodb+srv://" +
db_user +
":" +
db_pw +
"@gnews.stjap.mongodb.net/" +
db_name +
"?retryWrites=true&w=majority"
)

db = cluster[db_name]
collection = db[collection_name]

return collection

except Exception as e:
print("Connection Error.", e)


def post_database(collection, news):
"""post unique news articles to mongodb database"""
doc = {
"_id": hashlib.sha256(str(json.dumps(news)).encode('utf-8')).hexdigest(),
"title": news['title'],
"description": news['description'],
"published_date": news['published date'],
"url": news['url'],
"publisher": news['publisher']
}

try:
collection.update_one(doc, {'$set': doc}, upsert=True)
except pymongo.errors.DuplicateKeyError:
logging.error("Posting to database failed.")


def process_url(item, exclude_websites):
source = item.get('source').get('href')
if not all([not re.match(website, source) for website in
Expand Down
4 changes: 1 addition & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
feedparser~=6.0.2
beautifulsoup4>=4.9.3,<5
pymongo~=3.12.0
dnspython~=1.16.0
python-dotenv>=0.19.0
requests>=2.26.0,<3
requests

0 comments on commit b3d7b19

Please sign in to comment.