From 939112e65e919bdf28a9c0408a77913abfdff88e Mon Sep 17 00:00:00 2001
From: Boushra Bettir <116927138+boushrabettir@users.noreply.github.com>
Date: Tue, 19 Sep 2023 14:49:47 -0700
Subject: [PATCH 1/5] Refactor code

- Placed extra files in `utility` folder.
- Created `db.py` to store the basic database queries/functionalities
- Created `palm.py` to place all functions made for using PaLM API
- Created `scrap.py` to move all scraping functions in their own file
---
 db.py                                    |  30 ----
 main.py                                  | 211 +++--------------------
 utility/db.py                            |  66 +++++++
 opportunity.py => utility/opportunity.py |   0
 utility/palm.py                          | 111 ++++++++++++
 utility/scrap.py                         | 141 +++++++++++++++
 utility.py => utility/utils.py           | 126 +++-----------
 7 files changed, 360 insertions(+), 325 deletions(-)
 delete mode 100644 db.py
 create mode 100644 utility/db.py
 rename opportunity.py => utility/opportunity.py (100%)
 create mode 100644 utility/palm.py
 create mode 100644 utility/scrap.py
 rename utility.py => utility/utils.py (60%)

diff --git a/db.py b/db.py
deleted file mode 100644
index b881aac..0000000
--- a/db.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import psycopg2
-import os
-
-
-def instantiate_db_connection():
-    """Returns the connection from the DB"""
-
-    db_uri = os.getenv("DB_URI")
-    return psycopg2.connect(db_uri)
-
-
-def add_column(column_name: str, data_type: str) -> None:
-    """Adds a column for adjustment to the table after the table has been created"""
-
-    with instantiate_db_connection() as connection:
-        cursor = connection.cursor()
-
-        cursor.execute(f"ALTER TABLE jobs_table ADD COLUMN {column_name} {data_type}")
-
-        connection.commit()
-
-
-def delete_alL_opportunity_type(opp_type: str) -> None:
-    """Deletes all opportunities of a specific type for testing purposes only"""
-
-    with instantiate_db_connection() as connection:
-        cursor = connection.cursor()
-
-        cursor.execute("DELETE FROM jobs_table WHERE type = %s", (opp_type,))
-        connection.commit()
diff --git a/main.py b/main.py
index 89785e6..39de0f2 100644
--- a/main.py
+++ b/main.py
@@ -2,194 +2,22 @@
 import os
 import json
 import asyncio
-from typing import List
-import re
 from datetime import date
-import utility as utils
-import db
-import opportunity as opps
-from opportunity import Opportunity, OpportunityType
+from utility import utils
+import utility.db as db
+import utility.opportunity as opps
 from dotenv import load_dotenv
+from utility.scrap import (
+    request_github_internship24_data,
+    request_linkedin_data,
+    request_linkedin_internship24_data,
+)
+from utility.palm import gpt_job_analyze
 
-load_dotenv()  # To obtain keys from the .env file
 
-
-# ----------------- POSTGRES -----------------
-
-TABLE_NAME = os.getenv("DB_TABLE")
-MAX_LIST_LENGTH = 13
-
-
-def create():
-    """Creates the DB. Only needs to be called once."""
-
-    with db.instantiate_db_connection() as connection:
-        cursor = connection.cursor()
-
-        cursor.execute(
-            f"""CREATE TABLE IF NOT EXISTS {TABLE_NAME}(company TEXT, title TEXT, location TEXT, link TEXT, processed INTEGER DEFAULT 0)"""
-        )
-
-        connection.commit()
-
-
-# ----------------- INTERNSHIP DATA -----------------
-
-
-def request_github_internship24_data() -> List[Opportunity]:
-    """Scrapes Internship Data '24 from Github Repo"""
-
-    url = os.getenv("GH_INTERN24_URL")
-    parse_content = utils.content_parser(url)
-    github_list = []
-    td_elems = parse_content.find_all("tr")
-
-    for cell in td_elems[1:]:
-        if len(github_list) <= MAX_LIST_LENGTH:
-            elements = cell.find_all("td")
-
-            company = elements[0].text
-            title = elements[1].text
-            location = elements[2].text
-            link = elements[3]
-            if "🔒" not in link.text:
-                opportunity = Opportunity(
-                    company,
-                    title,
-                    location,
-                    link.find("a")["href"],
-                    0,
-                    OpportunityType.INTERNSHIP.value,
-                )
-                github_list.append(opportunity)
-
-    return github_list
-
-
-def request_linkedin_internship24_data() -> List[Opportunity]:
-    """Web scrapes Summer '24 Internship Opportunities using LinkedIn"""
-
-    url = os.getenv("LINKEDIN_INTERN_URL")
-
-    parse_content = utils.content_parser(url)
-
-    linkedin_internship_opps = utils.blueprint_opportunity_formatter(
-        parse_content,
-        "base-card relative w-full hover:no-underline focus:no-underline base-card--link base-search-card base-search-card--link job-search-card",
-        "hidden-nested-link",
-        "base-search-card__title",
-        "job-search-card__location",
-        "base-card__full-link",
-        True,
-        MAX_LIST_LENGTH,
-        OpportunityType.INTERNSHIP.value,
-    )
-
-    return linkedin_internship_opps
-
-
-# ----------------- JOB DATA -----------------
-
-
-def request_rapidapi_indeed_data() -> List[Opportunity]:
-    """
-    This API call retrieves a formatted response object
-    and returns a List[Opportunity] as the result
-    """
-
-    url = os.getenv("RAPID_API_URL")
-    rapid_api_key = os.getenv("RAPID_API_KEY")
-
-    headers = {
-        "X-RapidAPI-Key": rapid_api_key,
-        "X-RapidAPI-Host": "indeed12.p.rapidapi.com",
-    }
-
-    rapid_jobs = []
-    response = requests.get(url, headers=headers).json()
-
-    days_needed_command_value = utils.extract_command_value().days_needed[
-        0
-    ]  # Extracts command-line value
-
-    for elem in response["hits"]:
-        time = elem["formatted_relative_time"]
-
-        numeric = re.search(r"\d+", time)
-        formatted_time_integer = int(numeric.group()) if numeric else 0
-
-        if (
-            len(rapid_jobs) < MAX_LIST_LENGTH
-            and int(days_needed_command_value) >= formatted_time_integer
-        ):
-            company = elem["company_name"]
-            title = elem["title"]
-            location = elem["location"]
-            link = f'https://www.indeed.com/viewjob?jk={elem["id"]}&locality=us'
-            processed = 0
-
-            opportunity = Opportunity(
-                company,
-                title,
-                location,
-                link,
-                processed,
-                OpportunityType.FULL_TIME.value,
-            )
-
-            rapid_jobs.append(opportunity)
-
-    return rapid_jobs
-
-
-def request_linkedin_data() -> List[Opportunity]:
-    """Returns a List[Opportunity] which contains web scraped job content"""
-
-    url = os.getenv("LINKEDIN_URL")
-    parse_content = utils.content_parser(url)
-
-    linked_in_jobs = utils.blueprint_opportunity_formatter(
-        parse_content,
-        "base-card relative w-full hover:no-underline focus:no-underline base-card--link base-search-card base-search-card--link job-search-card",
-        "hidden-nested-link",
-        "base-search-card__title",
-        "job-search-card__location",
-        "base-card__full-link",
-        True,
-        MAX_LIST_LENGTH,
-        OpportunityType.FULL_TIME.value,
-    )
-
-    return linked_in_jobs
-
-
-# ----------------- RESET FUNCTION (DEBUGGING PURPOSES) -----------------
-
-
-def reset_processed_status(TABLE_NAME):
-    """Jobs status will be set to _processed = 0 for testing a debugging purposes"""
-
-    with db.instantiate_db_connection() as connection:
-        cursor = connection.cursor()
-
-        cursor.execute(
-            f"SELECT company, title, location FROM {TABLE_NAME} WHERE processed = 1 LIMIT 5"
-        )
-
-        rows = cursor.fetchall()
-
-        for row in rows:
-            company, title, location = row[:3]
-
-            cursor.execute(
-                f"UPDATE {TABLE_NAME} SET processed = 0 WHERE company = %s AND title = %s AND location = %s",
-                (company, title, location),
-            )
-
-        connection.commit()
-
-
-# ----------------- DISCORD BOT -----------------
+# Load and determine if all env variables are set
+load_dotenv()
+utils.verify_set_env_variables()
 
 
 async def execute_opportunities_webhook(webhook_url, job_message, internship_message):
@@ -247,7 +75,10 @@ async def main():
     # Creates table in database
     with_create_table_command = utils.extract_command_value().create
     if with_create_table_command:
-        create()
+        TABLE_NAME = os.getenv("DB_TABLE")
+
+        db.create(TABLE_NAME)
+
         print(f"Sucessfully created {TABLE_NAME}!")
         exit()  # Exit the main function to avoid calling other functions
 
@@ -265,7 +96,7 @@ async def main():
     # Consolidates all job-related opportunities into a comprehensive List[Opportunity], eliminating repetitive calls to the LLM SERVER.
     job_opps = utils.merge_all_opportunity_data(request_linkedin_data())
 
-    filtered_job_opps = utils.gpt_job_analyze(
+    filtered_job_opps = gpt_job_analyze(
         job_opps,
         prompt_object["full_time"],
     )
@@ -277,7 +108,7 @@ async def main():
         request_github_internship24_data(),
     )
 
-    filtered_internship_opps = utils.gpt_job_analyze(
+    filtered_internship_opps = gpt_job_analyze(
         internship_opps,
         prompt_object["internship"],
     )
@@ -289,7 +120,7 @@ async def main():
     # To do so, please comment the function calls above this comment.
     # After, please uncomment the following line of code:
 
-    # reset_processed_status()
+    # db.reset_processed_status()
 
     internship_data_results = opps.list_opportunities(True, "internship", filtered=True)
     job_data_results = opps.list_opportunities(True, "full_time", filtered=True)
@@ -311,5 +142,5 @@ async def main():
     opps.update_opportunities_status(internship_data_results)
 
 
-if __name__ == "__main__":
-    asyncio.run(main())
+# if __name__ == "__main__":
+#     asyncio.run(main())
diff --git a/utility/db.py b/utility/db.py
new file mode 100644
index 0000000..7371f07
--- /dev/null
+++ b/utility/db.py
@@ -0,0 +1,66 @@
+import psycopg2
+import os
+
+
+def instantiate_db_connection():
+    """Returns the connection from the DB"""
+
+    db_uri = os.getenv("DB_URI")
+    return psycopg2.connect(db_uri)
+
+
+def create(TABLE_NAME: str):
+    """Creates the DB. Only needs to be called once."""
+
+    with instantiate_db_connection() as connection:
+        cursor = connection.cursor()
+
+        cursor.execute(
+            f"""CREATE TABLE IF NOT EXISTS {TABLE_NAME}(company TEXT, title TEXT, location TEXT, link TEXT, processed INTEGER DEFAULT 0)"""
+        )
+
+        connection.commit()
+
+
+def add_column(column_name: str, data_type: str) -> None:
+    """Adds a column for adjustment to the table after the table has been created"""
+
+    with instantiate_db_connection() as connection:
+        cursor = connection.cursor()
+
+        cursor.execute(f"ALTER TABLE jobs_table ADD COLUMN {column_name} {data_type}")
+
+        connection.commit()
+
+
+def delete_all_opportunity_type(opp_type: str) -> None:
+    """Deletes all opportunities of a specific type for testing purposes only"""
+
+    with instantiate_db_connection() as connection:
+        cursor = connection.cursor()
+
+        cursor.execute("DELETE FROM jobs_table WHERE type = %s", (opp_type,))
+        connection.commit()
+
+
+def reset_processed_status(TABLE_NAME):
+    """Jobs status will be set to _processed = 0 for testing a debugging purposes"""
+
+    with instantiate_db_connection() as connection:
+        cursor = connection.cursor()
+
+        cursor.execute(
+            f"SELECT company, title, location FROM {TABLE_NAME} WHERE processed = 1 LIMIT 5"
+        )
+
+        rows = cursor.fetchall()
+
+        for row in rows:
+            company, title, location = row[:3]
+
+            cursor.execute(
+                f"UPDATE {TABLE_NAME} SET processed = 0 WHERE company = %s AND title = %s AND location = %s",
+                (company, title, location),
+            )
+
+        connection.commit()
diff --git a/opportunity.py b/utility/opportunity.py
similarity index 100%
rename from opportunity.py
rename to utility/opportunity.py
diff --git a/utility/palm.py b/utility/palm.py
new file mode 100644
index 0000000..f2e4606
--- /dev/null
+++ b/utility/palm.py
@@ -0,0 +1,111 @@
+import google.generativeai as palm
+from time import sleep
+import os
+import utils
+from dotenv import load_dotenv
+from typing import List
+import json
+from opportunity import Opportunity
+
+load_dotenv()
+utils.verify_set_env_variables()
+
+
+MAX_RETRY = 5  # Max number of retrys
+palm.configure(api_key=os.getenv("PALM_API_KEY"))
+
+
+def current_model_inuse() -> any:
+    """Returns the model in use"""
+
+    models = [
+        m
+        for m in palm.list_models()
+        if "generateText" in m.supported_generation_methods
+    ]
+
+    model = models[0].name
+
+    return model
+
+
+def parse_gpt_values(gpt_response) -> List[bool]:
+    """Helper function to parse the gpt response from a str -> List[bool]"""
+
+    response: List[bool]
+
+    for _ in range(MAX_RETRY):
+        try:
+            response = json.loads(gpt_response.lower())
+            break
+        except AttributeError:
+            sleep(0.5)
+
+    return response
+
+
+def filter_out_opportunities(list_of_opps, gpt_response) -> List[Opportunity]:
+    """Helper function for gpt_job_analyzer() to filter the data"""
+
+    structured_opps = [
+        opp for opp, response in zip(list_of_opps, gpt_response) if response
+    ]
+
+    print(f"Length after GPT analyzed the jobs: {len(structured_opps)}")
+    return structured_opps
+
+
+def get_parsed_values(prompt) -> List[bool]:
+    """Function which returns parsed values if the opportunity mathces with the clubs values"""
+
+    defaults = {
+        "model": "models/text-bison-001",
+        "temperature": 0.0,
+        "candidate_count": 1,
+        "top_k": 100,
+        "top_p": 0.95,
+        "max_output_tokens": 3072,
+        "stop_sequences": [],
+        "safety_settings": [
+            {"category": "HARM_CATEGORY_DEROGATORY", "threshold": 3},
+            {"category": "HARM_CATEGORY_TOXICITY", "threshold": 3},
+            {"category": "HARM_CATEGORY_VIOLENCE", "threshold": 3},
+            {"category": "HARM_CATEGORY_SEXUAL", "threshold": 3},
+            {"category": "HARM_CATEGORY_MEDICAL", "threshold": 3},
+            {"category": "HARM_CATEGORY_DANGEROUS", "threshold": 3},
+        ],
+    }
+
+    completion = palm.generate_text(**defaults, prompt=prompt)
+
+    parsed_values = parse_gpt_values(completion.result)
+    return parsed_values
+
+
+def gpt_job_analyze(list_of_opps: List[Opportunity], prompt: str) -> List[Opportunity]:
+    """Analyzes each job opportunity before being inserted into the DB"""
+
+    print(f"The jobs original length before filtering: {len(list_of_opps)}")
+
+    for opp in list_of_opps:
+        prompt += f"\nCompany: {opp.company}"
+        prompt += f"\nTitle: {opp.title}"
+        prompt += f"\nLocation: {opp.location}"
+        prompt += "\n"
+
+    parsed_values = []
+    for _ in range(MAX_RETRY):  # Keep looping until a valid prompt is received
+        try:
+            parsed_values = get_parsed_values(prompt)
+            break
+        except (
+            json.decoder.JSONDecodeError
+        ):  # The type of error that would be received is type JSON
+            sleep(0.5)
+
+    print(f" Below are the parsed values from GPT\n {parsed_values}")
+    print(parsed_values)  # For debugging purposes
+
+    return filter_out_opportunities(
+        list_of_opps, parsed_values
+    )  # Returns filtered out opportunities
diff --git a/utility/scrap.py b/utility/scrap.py
new file mode 100644
index 0000000..5df3e79
--- /dev/null
+++ b/utility/scrap.py
@@ -0,0 +1,141 @@
+from utility.opportunity import Opportunity, OpportunityType
+from typing import List
+import utils
+import os
+from dotenv import load_dotenv
+import re
+import requests
+
+load_dotenv()
+utils.verify_set_env_variables()
+
+MAX_OPPORTUNITY_LIST_LENGTH = 13
+
+# ----------------- INTERNSHIP DATA -----------------
+
+
+def request_github_internship24_data() -> List[Opportunity]:
+    """Scrapes Internship Data '24 from Github Repo"""
+
+    github_list = []
+
+    url = os.getenv("GH_INTERN24_URL")
+    parse_content = utils.content_parser(url)
+    td_elems = parse_content.find_all("tr")
+
+    for cell in td_elems[1:]:
+        if len(github_list) <= MAX_OPPORTUNITY_LIST_LENGTH:
+            elements = cell.find_all("td")
+
+            company = elements[0].text
+            title = elements[1].text
+            location = elements[2].text
+            link = elements[3]
+            if "🔒" not in link.text:
+                opportunity = Opportunity(
+                    company,
+                    title,
+                    location,
+                    link.find("a")["href"],
+                    0,
+                    OpportunityType.INTERNSHIP.value,
+                )
+                github_list.append(opportunity)
+
+    return github_list
+
+
+def request_linkedin_internship24_data() -> List[Opportunity]:
+    """Web scrapes Summer '24 Internship Opportunities using LinkedIn"""
+
+    url = os.getenv("LINKEDIN_INTERN_URL")
+    parse_content = utils.content_parser(url)
+
+    linkedin_internship_opps = utils.blueprint_opportunity_formatter(
+        parse_content,
+        "base-card relative w-full hover:no-underline focus:no-underline base-card--link base-search-card base-search-card--link job-search-card",
+        "hidden-nested-link",
+        "base-search-card__title",
+        "job-search-card__location",
+        "base-card__full-link",
+        True,
+        MAX_OPPORTUNITY_LIST_LENGTH,
+        OpportunityType.INTERNSHIP.value,
+    )
+
+    return linkedin_internship_opps
+
+
+# ----------------- JOB DATA -----------------
+
+
+def request_rapidapi_indeed_data() -> List[Opportunity]:
+    """
+    This API call retrieves a formatted response object
+    and returns a List[Opportunity] as the result
+    """
+
+    url = os.getenv("RAPID_API_URL")
+    rapid_api_key = os.getenv("RAPID_API_KEY")
+
+    headers = {
+        "X-RapidAPI-Key": rapid_api_key,
+        "X-RapidAPI-Host": "indeed12.p.rapidapi.com",
+    }
+
+    rapid_jobs = []
+    response = requests.get(url, headers=headers).json()
+
+    days_needed_command_value = utils.extract_command_value().days_needed[
+        0
+    ]  # Extracts command-line value
+
+    for elem in response["hits"]:
+        time = elem["formatted_relative_time"]
+
+        numeric = re.search(r"\d+", time)
+        formatted_time_integer = int(numeric.group()) if numeric else 0
+
+        if (
+            len(rapid_jobs) < MAX_OPPORTUNITY_LIST_LENGTH
+            and int(days_needed_command_value) >= formatted_time_integer
+        ):
+            company = elem["company_name"]
+            title = elem["title"]
+            location = elem["location"]
+            link = f'https://www.indeed.com/viewjob?jk={elem["id"]}&locality=us'
+            processed = 0
+
+            opportunity = Opportunity(
+                company,
+                title,
+                location,
+                link,
+                processed,
+                OpportunityType.FULL_TIME.value,
+            )
+
+            rapid_jobs.append(opportunity)
+
+    return rapid_jobs
+
+
+def request_linkedin_data() -> List[Opportunity]:
+    """Returns a List[Opportunity] which contains web scraped job content"""
+
+    url = os.getenv("LINKEDIN_URL")
+    parse_content = utils.content_parser(url)
+
+    linked_in_jobs = utils.blueprint_opportunity_formatter(
+        parse_content,
+        "base-card relative w-full hover:no-underline focus:no-underline base-card--link base-search-card base-search-card--link job-search-card",
+        "hidden-nested-link",
+        "base-search-card__title",
+        "job-search-card__location",
+        "base-card__full-link",
+        True,
+        MAX_OPPORTUNITY_LIST_LENGTH,
+        OpportunityType.FULL_TIME.value,
+    )
+
+    return linked_in_jobs
diff --git a/utility.py b/utility/utils.py
similarity index 60%
rename from utility.py
rename to utility/utils.py
index e91d4da..698bfcb 100644
--- a/utility.py
+++ b/utility/utils.py
@@ -3,9 +3,7 @@
 from typing import List
 import os
 import argparse
-from time import sleep
 import json
-import google.generativeai as palm
 from bs4 import BeautifulSoup
 from opportunity import Opportunity, OpportunityType
 
@@ -37,6 +35,27 @@ def extract_command_value():
     return arguments
 
 
+def verify_set_env_variables() -> any:
+    """Determines if the env variables are all set properly"""
+
+    env_variables = [
+        "LINKEDIN_URL",
+        "DISCORD_WEBHOOK",
+        "DB_URI",
+        "DB_TABLE",
+        "PALM_API_KEY",
+        "GH_INTERN24_URL",
+        "LINKEDIN_INTERN_URL",
+        "PROMPTS_PATH",
+        "MESSAGE_PATH",
+    ]
+
+    # Checks to see if the env variables in env_variables
+    # all exist in the current variables
+    if not set(os.environ).issuperset(env_variables):
+        raise EnvironmentError("One or more env variables are not set.")
+
+
 def calculate_day_difference(elem: datetime) -> int:
     """Calculates day difference for job posting times to the relevant day today"""
 
@@ -150,106 +169,3 @@ def determine_customized_message(message: dict) -> str:
     file_message = json.loads(message)[0]
 
     return file_message["Message"] if file_message["Message"] else default
-
-
-# ----------------- PALM API -----------------
-
-
-MAX_RETRY = 5  # Max number of retrys
-palm.configure(api_key=os.getenv("PALM_API_KEY"))
-
-
-def current_model_inuse() -> any:
-    """Returns the model in use"""
-
-    models = [
-        m
-        for m in palm.list_models()
-        if "generateText" in m.supported_generation_methods
-    ]
-
-    model = models[0].name
-
-    return model
-
-
-def parse_gpt_values(gpt_response) -> List[bool]:
-    """Helper function to parse the gpt response from a str -> List[bool]"""
-
-    response: List[bool]
-
-    for _ in range(MAX_RETRY):
-        try:
-            response = json.loads(gpt_response.lower())
-            break
-        except AttributeError:
-            sleep(0.5)
-
-    return response
-
-
-def filter_out_opportunities(list_of_opps, gpt_response) -> List[Opportunity]:
-    """Helper function for gpt_job_analyzer() to filter the data"""
-
-    structured_opps = [
-        opp for opp, response in zip(list_of_opps, gpt_response) if response
-    ]
-
-    print(f"Length after GPT analyzed the jobs: {len(structured_opps)}")
-    return structured_opps
-
-
-def get_parsed_values(prompt) -> List[bool]:
-    """Function which returns parsed values if the opportunity mathces with the clubs values"""
-
-    defaults = {
-        "model": "models/text-bison-001",
-        "temperature": 0.0,
-        "candidate_count": 1,
-        "top_k": 100,
-        "top_p": 0.95,
-        "max_output_tokens": 3072,
-        "stop_sequences": [],
-        "safety_settings": [
-            {"category": "HARM_CATEGORY_DEROGATORY", "threshold": 3},
-            {"category": "HARM_CATEGORY_TOXICITY", "threshold": 3},
-            {"category": "HARM_CATEGORY_VIOLENCE", "threshold": 3},
-            {"category": "HARM_CATEGORY_SEXUAL", "threshold": 3},
-            {"category": "HARM_CATEGORY_MEDICAL", "threshold": 3},
-            {"category": "HARM_CATEGORY_DANGEROUS", "threshold": 3},
-        ],
-    }
-
-    completion = palm.generate_text(**defaults, prompt=prompt)
-
-    parsed_values = parse_gpt_values(completion.result)
-    return parsed_values
-
-
-def gpt_job_analyze(list_of_opps: List[Opportunity], prompt: str) -> List[Opportunity]:
-    """Analyzes each job opportunity before being inserted into the DB"""
-
-    print(f"The jobs original length before filtering: {len(list_of_opps)}")
-
-    for opp in list_of_opps:
-        prompt += f"\nCompany: {opp.company}"
-        prompt += f"\nTitle: {opp.title}"
-        prompt += f"\nLocation: {opp.location}"
-        prompt += "\n"
-
-    parsed_values = []
-    for _ in range(MAX_RETRY):  # Keep looping until a valid prompt is received
-        try:
-            parsed_values = get_parsed_values(prompt)
-            break
-        except (
-            json.decoder.JSONDecodeError
-        ):  # The type of error that would be received is type JSON
-            sleep(0.5)
-
-    print(f" Below are the parsed values from GPT\n {parsed_values}")
-    print(parsed_values)  # For debugging purposes
-
-    return filter_out_opportunities(
-        list_of_opps, parsed_values
-    )  # Returns filtered out opportunities

From 133f201a50009e11e913ea086ea0c1d4f72c9a96 Mon Sep 17 00:00:00 2001
From: Boushra Bettir <116927138+boushrabettir@users.noreply.github.com>
Date: Mon, 25 Sep 2023 12:31:55 -0700
Subject: [PATCH 2/5] Update scrap->scrape.

---
 utility/{scrap.py => scrape.py} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename utility/{scrap.py => scrape.py} (99%)

diff --git a/utility/scrap.py b/utility/scrape.py
similarity index 99%
rename from utility/scrap.py
rename to utility/scrape.py
index 5df3e79..288b012 100644
--- a/utility/scrap.py
+++ b/utility/scrape.py
@@ -9,7 +9,7 @@
 load_dotenv()
 utils.verify_set_env_variables()
 
-MAX_OPPORTUNITY_LIST_LENGTH = 13
+MAX_OPPORTUNITY_LIST_LENGTH = 15
 
 # ----------------- INTERNSHIP DATA -----------------
 

From a6a7c59e06beec7ff7556613bb63fc0aaf2ee48e Mon Sep 17 00:00:00 2001
From: Boushra Bettir <116927138+boushrabettir@users.noreply.github.com>
Date: Mon, 25 Sep 2023 12:32:07 -0700
Subject: [PATCH 3/5] Update import

---
 main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.py b/main.py
index 39de0f2..ce7074e 100644
--- a/main.py
+++ b/main.py
@@ -7,7 +7,7 @@
 import utility.db as db
 import utility.opportunity as opps
 from dotenv import load_dotenv
-from utility.scrap import (
+from utility.scrape import (
     request_github_internship24_data,
     request_linkedin_data,
     request_linkedin_internship24_data,

From 8c7e0162ca1eb92cb6bdaba18e24717e1aeacd80 Mon Sep 17 00:00:00 2001
From: Boushra Bettir <116927138+boushrabettir@users.noreply.github.com>
Date: Fri, 6 Oct 2023 22:31:45 -0700
Subject: [PATCH 4/5] Update imports

---
 main.py                              | 24 ++++++++++++------------
 blocklist.py => utility/blocklist.py |  5 ++++-
 utility/opportunity.py               |  2 +-
 utility/palm.py                      |  4 ++--
 utility/scrape.py                    |  2 +-
 utility/utils.py                     |  4 ++--
 6 files changed, 22 insertions(+), 19 deletions(-)
 rename blocklist.py => utility/blocklist.py (66%)

diff --git a/main.py b/main.py
index ce7074e..22cae04 100644
--- a/main.py
+++ b/main.py
@@ -3,7 +3,7 @@
 import json
 import asyncio
 from datetime import date
-from utility import utils
+import utility.utils as ut
 import utility.db as db
 import utility.opportunity as opps
 from dotenv import load_dotenv
@@ -17,7 +17,7 @@
 
 # Load and determine if all env variables are set
 load_dotenv()
-utils.verify_set_env_variables()
+ut.verify_set_env_variables()
 
 
 async def execute_opportunities_webhook(webhook_url, job_message, internship_message):
@@ -73,7 +73,7 @@ async def execute_opportunities_webhook(webhook_url, job_message, internship_mes
 
 async def main():
     # Creates table in database
-    with_create_table_command = utils.extract_command_value().create
+    with_create_table_command = ut.extract_command_value().create
     if with_create_table_command:
         TABLE_NAME = os.getenv("DB_TABLE")
 
@@ -83,18 +83,18 @@ async def main():
         exit()  # Exit the main function to avoid calling other functions
 
     file_paths = [os.getenv("MESSAGE_PATH"), os.getenv("PROMPTS_PATH")]
-    customized_object = utils.user_customization(file_paths)
+    customized_object = ut.user_customization(file_paths)
 
     # Determines the customized prompts for PaLM
-    prompt_object = utils.determine_prompts(customized_object["customized_prompts"])
+    prompt_object = ut.determine_prompts(customized_object["customized_prompts"])
 
     # Determines the customized message for the webhook
-    finalized_message = utils.determine_customized_message(
+    finalized_message = ut.determine_customized_message(
         customized_object["customized_message"]
     )
 
     # Consolidates all job-related opportunities into a comprehensive List[Opportunity], eliminating repetitive calls to the LLM SERVER.
-    job_opps = utils.merge_all_opportunity_data(request_linkedin_data())
+    job_opps = ut.merge_all_opportunity_data(request_linkedin_data())
 
     filtered_job_opps = gpt_job_analyze(
         job_opps,
@@ -103,7 +103,7 @@ async def main():
     opps.ingest_opportunities(filtered_job_opps)
 
     # Consolidates all job-related opportunities into a comprehensive List[Opportunity], eliminating repetitive calls to the LLM SERVER.
-    internship_opps = utils.merge_all_opportunity_data(
+    internship_opps = ut.merge_all_opportunity_data(
         request_linkedin_internship24_data(),
         request_github_internship24_data(),
     )
@@ -138,9 +138,9 @@ async def main():
         discord_webhook, job_formatted_message, internship_formatted_message
     )
 
-    opps.update_opportunities_status(job_data_results)
-    opps.update_opportunities_status(internship_data_results)
+    # opps.update_opportunities_status(job_data_results)
+    # opps.update_opportunities_status(internship_data_results)
 
 
-# if __name__ == "__main__":
-#     asyncio.run(main())
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/blocklist.py b/utility/blocklist.py
similarity index 66%
rename from blocklist.py
rename to utility/blocklist.py
index cf9c7ea..be7052c 100644
--- a/blocklist.py
+++ b/utility/blocklist.py
@@ -2,7 +2,10 @@ class BlockList:
     """A class holding methods to determine if a company is blocklisted"""
 
     BLOCKLISTED_COMPANIES = set(
-        ["Pattern Learning AI - Career & Tech Recruitment Reimagined!"]
+        [
+            "Pattern Learning AI - Career & Tech Recruitment Reimagined!",
+            "Patterned Learning AI - Tech Recruitment & Staffing",
+        ]
     )
 
     def is_blacklisted_company(self, company: str) -> bool:
diff --git a/utility/opportunity.py b/utility/opportunity.py
index bbc90cf..b86a1fa 100644
--- a/utility/opportunity.py
+++ b/utility/opportunity.py
@@ -1,7 +1,7 @@
 from dataclasses import dataclass
 from dotenv import load_dotenv
 from typing import List
-import db
+import utility.db as db
 from enum import Enum
 import os
 
diff --git a/utility/palm.py b/utility/palm.py
index f2e4606..dc478a9 100644
--- a/utility/palm.py
+++ b/utility/palm.py
@@ -1,11 +1,11 @@
 import google.generativeai as palm
 from time import sleep
 import os
-import utils
+import utility.utils as utils
 from dotenv import load_dotenv
 from typing import List
 import json
-from opportunity import Opportunity
+from utility.opportunity import Opportunity
 
 load_dotenv()
 utils.verify_set_env_variables()
diff --git a/utility/scrape.py b/utility/scrape.py
index 288b012..15fd870 100644
--- a/utility/scrape.py
+++ b/utility/scrape.py
@@ -1,6 +1,6 @@
 from utility.opportunity import Opportunity, OpportunityType
 from typing import List
-import utils
+import utility.utils as utils
 import os
 from dotenv import load_dotenv
 import re
diff --git a/utility/utils.py b/utility/utils.py
index d74f6e9..ba2001c 100644
--- a/utility/utils.py
+++ b/utility/utils.py
@@ -5,8 +5,8 @@
 import argparse
 import json
 from bs4 import BeautifulSoup
-from opportunity import Opportunity
-from blocklist import BlockList
+from utility.opportunity import Opportunity
+from utility.blocklist import BlockList
 
 # ----------------- FOR CLI LIBRARY COMMAND -----------------
 

From d21614b7b41d354c42a3a91fb4c9352e7887b89a Mon Sep 17 00:00:00 2001
From: Boushra Bettir <116927138+boushrabettir@users.noreply.github.com>
Date: Sat, 7 Oct 2023 20:14:24 -0700
Subject: [PATCH 5/5] Update types + debug message.

---
 main.py                |  8 +++++---
 utility/db.py          |  4 ++--
 utility/opportunity.py |  2 +-
 utility/palm.py        | 19 ++++++++++++-------
 4 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/main.py b/main.py
index 22cae04..eb13b98 100644
--- a/main.py
+++ b/main.py
@@ -20,7 +20,9 @@
 ut.verify_set_env_variables()
 
 
-async def execute_opportunities_webhook(webhook_url, job_message, internship_message):
+async def execute_opportunities_webhook(
+    webhook_url: str, job_message: str, internship_message: str
+):
     """
     Executes the message which receives the formatted message
     from the format_opportunities() function as well as the webhook
@@ -138,8 +140,8 @@ async def main():
         discord_webhook, job_formatted_message, internship_formatted_message
     )
 
-    # opps.update_opportunities_status(job_data_results)
-    # opps.update_opportunities_status(internship_data_results)
+    opps.update_opportunities_status(job_data_results)
+    opps.update_opportunities_status(internship_data_results)
 
 
 if __name__ == "__main__":
diff --git a/utility/db.py b/utility/db.py
index 7371f07..20fe0f6 100644
--- a/utility/db.py
+++ b/utility/db.py
@@ -9,7 +9,7 @@ def instantiate_db_connection():
     return psycopg2.connect(db_uri)
 
 
-def create(TABLE_NAME: str):
+def create(TABLE_NAME: str) -> None:
     """Creates the DB. Only needs to be called once."""
 
     with instantiate_db_connection() as connection:
@@ -43,7 +43,7 @@ def delete_all_opportunity_type(opp_type: str) -> None:
         connection.commit()
 
 
-def reset_processed_status(TABLE_NAME):
+def reset_processed_status(TABLE_NAME: str) -> None:
     """Jobs status will be set to _processed = 0 for testing a debugging purposes"""
 
     with instantiate_db_connection() as connection:
diff --git a/utility/opportunity.py b/utility/opportunity.py
index b86a1fa..1385543 100644
--- a/utility/opportunity.py
+++ b/utility/opportunity.py
@@ -32,7 +32,7 @@ class Opportunity:
 table_name = os.getenv("DB_TABLE")
 
 
-def ingest_opportunities(job_data):
+def ingest_opportunities(job_data: List[Opportunity]) -> None:
     """Inserts opportunities if and only if they do not already exist"""
     with db.instantiate_db_connection() as connection:
         cursor = connection.cursor()
diff --git a/utility/palm.py b/utility/palm.py
index dc478a9..170b9de 100644
--- a/utility/palm.py
+++ b/utility/palm.py
@@ -29,7 +29,7 @@ def current_model_inuse() -> any:
     return model
 
 
-def parse_gpt_values(gpt_response) -> List[bool]:
+def parse_gpt_values(gpt_response: str) -> List[bool]:
     """Helper function to parse the gpt response from a str -> List[bool]"""
 
     response: List[bool]
@@ -44,18 +44,22 @@ def parse_gpt_values(gpt_response) -> List[bool]:
     return response
 
 
-def filter_out_opportunities(list_of_opps, gpt_response) -> List[Opportunity]:
+def filter_out_opportunities(
+    list_of_opps: List[Opportunity], gpt_response: List[bool]
+) -> List[Opportunity]:
     """Helper function for gpt_job_analyzer() to filter the data"""
 
     structured_opps = [
         opp for opp, response in zip(list_of_opps, gpt_response) if response
     ]
 
-    print(f"Length after GPT analyzed the jobs: {len(structured_opps)}")
+    print(
+        f"Length after GPT analyzed the {list_of_opps[0].type}: {len(structured_opps)}"
+    )
     return structured_opps
 
 
-def get_parsed_values(prompt) -> List[bool]:
+def get_parsed_values(prompt: str) -> List[bool]:
     """Function which returns parsed values if the opportunity mathces with the clubs values"""
 
     defaults = {
@@ -85,7 +89,9 @@ def get_parsed_values(prompt) -> List[bool]:
 def gpt_job_analyze(list_of_opps: List[Opportunity], prompt: str) -> List[Opportunity]:
     """Analyzes each job opportunity before being inserted into the DB"""
 
-    print(f"The jobs original length before filtering: {len(list_of_opps)}")
+    print(
+        f"The type '{list_of_opps[0].type}' original length before filtering: {len(list_of_opps)}"
+    )
 
     for opp in list_of_opps:
         prompt += f"\nCompany: {opp.company}"
@@ -103,8 +109,7 @@ def gpt_job_analyze(list_of_opps: List[Opportunity], prompt: str) -> List[Opport
         ):  # The type of error that would be received is type JSON
             sleep(0.5)
 
-    print(f" Below are the parsed values from GPT\n {parsed_values}")
-    print(parsed_values)  # For debugging purposes
+    print(f" Below are the parsed values from GPT - {parsed_values}")
 
     return filter_out_opportunities(
         list_of_opps, parsed_values