diff --git a/app/main.py b/app/main.py index a105921..add5fe4 100644 --- a/app/main.py +++ b/app/main.py @@ -4,7 +4,7 @@ from fastapi import FastAPI from starlette.exceptions import HTTPException -from app.router.chatbot_article_detail_router import chatbot_article_router +from app.router.chatbot_article_router import chatbot_article_router from app.config.exception_handler import exception_handler, http_exception_handler from app.config.middlewares.request_response_logging_middle_ware import ( LoggingMiddleware, diff --git a/app/model/crawled_article.py b/app/model/crawled_article.py index d174537..544eefa 100644 --- a/app/model/crawled_article.py +++ b/app/model/crawled_article.py @@ -24,12 +24,6 @@ class Articles(Base): category = Column(CHAR(255), nullable=True) published_at = Column(DateTime, nullable=True) image_url = Column(String, nullable=True) - probability_issue_finder = Column(Integer, nullable=True) - probability_lifestyle_consumer = Column(Integer, nullable=True) - probability_entertainer = Column(Integer, nullable=True) - probability_tech_specialist = Column(Integer, nullable=True) - probability_professionals = Column(Integer, nullable=True) - @event.listens_for(Articles, "before_update", propagate=True) def update_timestamp(mapper, connection, target): # pylint: disable=unused-argument diff --git a/app/model/prompt/simple_article/2024-07-14.txt b/app/model/prompt/simple_article/2024-07-14.txt index ee8b9f7..79d563b 100644 --- a/app/model/prompt/simple_article/2024-07-14.txt +++ b/app/model/prompt/simple_article/2024-07-14.txt @@ -4,18 +4,18 @@ 다음은 json 형식의 예시이다: {{ "title": "MZ세대가 흥미를 끌만한 기사 제목(한국어)", - "content": "기사 본문 (한국어). 단, 경제 기사의 독자층이 경제 지식이 부족한 20대 초반인 것을 고려하여 적당한 이모지를 사용하여 친근하고 간결하게 설명할 것. 문단은 \n으로 구분할 것.", + "content": "기사 본문 (한국어). 단, 경제 기사의 독자층이 경제 지식이 부족한 20대 초반인 것을 고려하여 적당한 이모지를 사용하여 친근하게 재생성할 것. 문단은 \n으로 구분할 것.", "phrase": {{"어려웠던 경제 표현들" : "어려웠던 경제 표현들을 쉽게 바꾼 문구"}} (예시: {{"환율" : "다른 나라 돈과 우리나라 돈을 교환하는 비율"}}), "comment": "기사를 보고 추론할 수 있는 것 1문장을 친구에게 설명하는 듯한 표현으로", "category": "Category 중 하나" }} enum Category: - ECONOMY_AND_BUSINESS = "경제 및 기업" - POLITICS_AND_SOCIETY = "정치 및 사회" - TECHNOLOGY_AND_CULTURE = "기술 및 문화" - SPORTS_AND_LEISURE = "스포츠 및 여가" - OPINION_AND_ANALYSIS = "오피니언 및 분석" + ECONOMY_AND_BUSINESS + POLITICS_AND_SOCIETY + TECHNOLOGY_AND_CULTURE + SPORTS_AND_LEISURE + OPINION_AND_ANALYSIS 결과는 json 형식이어야 한다. diff --git a/app/model/subscription.py b/app/model/subscription.py index dbf678e..b38209c 100644 --- a/app/model/subscription.py +++ b/app/model/subscription.py @@ -7,11 +7,11 @@ class MailTypeCategory(Enum): - ECONOMY_AND_BUSINESS = "경제 및 기업" - POLITICS_AND_SOCIETY = "정치 및 사회" - TECHNOLOGY_AND_CULTURE = "기술 및 문화" - SPORTS_AND_LEISURE = "스포츠 및 여가" - OPINION_AND_ANALYSIS = "오피니언 및 분석" + ECONOMY_AND_BUSINESS = "ECONOMY_AND_BUSINESS" # 경제와 비즈니스 + POLITICS_AND_SOCIETY = "POLITICS_AND_SOCIETY" # 정치와 사회 + TECHNOLOGY_AND_CULTURE = "TECHNOLOGY_AND_CULTURE" # 기술과 문화 + SPORTS_AND_LEISURE = "SPORTS_AND_LEISURE" # 스포츠와 여가 + OPINION_AND_ANALYSIS = "OPINION_AND_ANALYSIS" # 의견과 분석 class Subscription(Base): diff --git a/app/rag_lang_chain/langchain_applied.py b/app/rag_lang_chain/langchain_applied.py index bc9474f..dd7e387 100644 --- a/app/rag_lang_chain/langchain_applied.py +++ b/app/rag_lang_chain/langchain_applied.py @@ -6,6 +6,7 @@ from langchain.schema import Document from langchain_core.messages import HumanMessage from langchain_openai import ChatOpenAI +from pydantic.v1 import BaseModel from app.config.loguru_config import logger from app.rag_lang_chain.chromadb_manager import ChromaDBManager @@ -15,9 +16,9 @@ ) -class RagAppliedResult: +class RagAppliedResult(BaseModel): result_text: str - related_documents: List[Union[Document, dict]] + related_documents: List[Document] async def request_rag_applied_openai( @@ -34,11 +35,7 @@ async def request_rag_applied_openai( ) # Step 1: Google Custom Search API를 사용하여 관련 정보 수집 - google_results = await google_cse_retriever.retrieve( - original_text - ) # FIXME: 왜 GoogleCSERetriever를 사용하는가? # pylint: disable=fixme - if not google_results: - raise HTTPException(status_code=404, detail="No results found from Google.") + google_results = await google_cse_retriever.retrieve(original_text) # Step 2: 검색 결과를 벡터화하고 ChromaDB에 저장 chroma_db_manager = ChromaDBManager() @@ -49,14 +46,17 @@ async def request_rag_applied_openai( additional_info = await search.aget_relevant_documents(original_text, num_results=3) # Step 4: 프롬프트 생성(원문 + 검색 결과 + 추가 정보) - rag_applied_prompt = await create_rag_applied_prompt( - original_prompt=system_prompt, relevant_info=search_results + additional_info - ) + rag_applied_prompt = system_prompt + if search_results: + rag_applied_prompt = await create_rag_applied_prompt( + original_prompt=system_prompt, + relevant_info=search_results + additional_info, + ) # Step 5: OpenAI 요청 결과 반환 try: search_llm = ChatOpenAI( - temperature=0, model="gpt-4", max_tokens=1500, api_key=openai_api_key + temperature=0, model="gpt-4o", max_tokens=1500, api_key=openai_api_key ) response = await search_llm.agenerate( messages=[[HumanMessage(rag_applied_prompt)]] @@ -70,11 +70,9 @@ async def request_rag_applied_openai( logger.info(f"Response: {response.generations[0][0].text}") - # response.generations[0][0].text - return RagAppliedResult( result_text=response.generations[0][0].text, - related_documents=search_results + additional_info, + related_documents=search_results, ) diff --git a/app/recommend/__init__.py b/app/recommend/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/repository/crawled_article_crud.py b/app/repository/crawled_article_crud.py index 70fe5aa..442b11f 100644 --- a/app/repository/crawled_article_crud.py +++ b/app/repository/crawled_article_crud.py @@ -21,22 +21,6 @@ async def get(self, pk: int, session: AsyncSession): ) return article - async def set_interest_type( - self, pk:int, interest_types : List[int], session: AsyncSession - ): - repository = get_repository(Articles)(session) - return await repository.update_by_pk( - pk = pk, - data = { - 'probability_issue_finder': interest_types[0], - 'probability_lifestyle_consumer': interest_types[1], - 'probability_entertainer': interest_types[2], - 'probability_tech_specialist': interest_types[3], - 'probability_professionals': interest_types[4] - } - - ) - async def get_all(self, session: AsyncSession): repository = get_repository(Articles)(session) return await repository.filter() diff --git a/app/router/chatbot_article_detail_router.py b/app/router/chatbot_article_router.py similarity index 55% rename from app/router/chatbot_article_detail_router.py rename to app/router/chatbot_article_router.py index 56b01d6..632754c 100644 --- a/app/router/chatbot_article_detail_router.py +++ b/app/router/chatbot_article_router.py @@ -1,26 +1,30 @@ -from fastapi import APIRouter +from fastapi import APIRouter, Depends from pydantic import BaseModel -from app.service.chatbot_article_detail_service import request_rag_applied_openai +from sqlalchemy.ext.asyncio import AsyncSession +from app.database.session import get_db_session +from app.service.chatbot_article_service import request_rag_applied_openai from app.utils.generic_response import GenericResponseDTO chatbot_article_router = APIRouter() # 사용자 요청 class GenerateDetailArticleRequestDTO(BaseModel): - news_content: str + id : int prompt: str @chatbot_article_router.post( - "/chatbot-article-detail", response_model=GenericResponseDTO + "/chatbot/article", response_model=GenericResponseDTO ) async def chatbot_article_detail_( request: GenerateDetailArticleRequestDTO, + session: AsyncSession = Depends(get_db_session) ): rag_applied_result = await request_rag_applied_openai( - original_text=request.news_content, - system_prompt=request.prompt + news_id =request.id, + system_prompt=request.prompt, + session = session ) return GenericResponseDTO( diff --git a/app/service/article_manage_service.py b/app/service/article_manage_service.py index c039497..c3d10ae 100644 --- a/app/service/article_manage_service.py +++ b/app/service/article_manage_service.py @@ -1,11 +1,14 @@ from datetime import datetime from typing import List + from sqlalchemy.ext.asyncio import AsyncSession + from app.model.article_publisher import Publisher from app.model.crawled_article import Articles from app.model.subscription import MailTypeCategory from app.repository.crawled_article_crud import CrawledArticleRepository + class ArticleManageService: async def create_article( self, @@ -18,7 +21,7 @@ async def create_article( phrase: dict, comment: str, image_url: str, - published_at: str, + published_at: datetime, category: MailTypeCategory, session: AsyncSession, ) -> Articles: @@ -31,11 +34,10 @@ async def create_article( simple_title=simple_title, simple_content=simple_content, comment=comment, - published_at=datetime.strptime(published_at, '%Y-%m-%dT%H:%M:%S'), + published_at=published_at, image_url=image_url, category=category.name, phrase=phrase, - probability_issue_finder=-1 ), session=session, ) diff --git a/app/service/chatbot_article_detail_service.py b/app/service/chatbot_article_service.py similarity index 74% rename from app/service/chatbot_article_detail_service.py rename to app/service/chatbot_article_service.py index df0f175..24df398 100644 --- a/app/service/chatbot_article_detail_service.py +++ b/app/service/chatbot_article_service.py @@ -3,7 +3,9 @@ import aiohttp from fastapi import HTTPException from langchain_core.messages import HumanMessage +from langchain_core.outputs import LLMResult from langchain_openai import ChatOpenAI +from sqlalchemy.ext.asyncio import AsyncSession from app.config.loguru_config import logger from app.rag_lang_chain.chromadb_manager import ChromaDBManager @@ -11,6 +13,7 @@ AsyncGoogleSearchAPIWrapper, GoogleCSERetriever, ) +from app.service.article_manage_service import ArticleManageService from langchain.schema import Document @@ -40,8 +43,9 @@ def to_dict(self) -> Dict: async def request_rag_applied_openai( - original_text: str, # OriginalText: 기사 원문(Google Custom Search에 보낼 용도) - system_prompt: str, # SystemPrompt: 시스템 프롬프트 + news_id: int, + system_prompt: str, + session : AsyncSession ) -> Dict: openai_api_key = os.getenv("OPENAI_API_KEY") google_api_key = os.getenv("GOOGLE_API_KEY") @@ -52,14 +56,26 @@ async def request_rag_applied_openai( api_key=google_api_key, cse_id=google_cse_id ) + # Step 0 : 기사 id값에 따른 기사 원문 가져오기 + article_service = ArticleManageService() + article_by_id = await article_service.get_article_by_id(news_id, session) + original_text = article_by_id.content + if not original_text: + raise HTTPException(status_code=404, detail="Article not found.") + + # Step 1: Google Custom Search API를 사용하여 사용자가 입력한 original_text 관련 정보 전부 수집 # original_text와 관련된 웹 페이지의 목록을 반환함. 각 웹 페이지는 title(검색 결과 제목), link(웹 페이지 url), snippet(검색 결과의 요약)으로 구성됨. google_results = await google_cse_retriever.retrieve( original_text ) - if not google_results: - raise HTTPException(status_code=404, detail="No results found from Google.") logger.info(f"1. Google results: {google_results}") + if not original_text: + response = await openai_response(openai_api_key, system_prompt) + return RagAppliedResult( + result_text=response.generations[0][0].text, + related_documents=[], + ).to_dict() # Step 2: 검색 결과를 벡터화하고 ChromaDB에 저장 chroma_db_manager = ChromaDBManager() @@ -71,20 +87,35 @@ async def request_rag_applied_openai( additional_info = await search.aget_relevant_documents(original_text, num_results=3) logger.info(f"3. Additional info: {additional_info}") - # Step 4: 프롬프트 생성(원문 + 검색 결과 + 추가 정보) - + # Step 4: 프롬프트 생성 (원문 + 검색 결과 + 추가 정보) rag_applied_prompt = await create_rag_applied_prompt( - original_prompt=system_prompt, relevant_info=search_results + additional_info + original_prompt=system_prompt, relevant_info=search_results + additional_info, original_text=original_text ) # Step 5: OpenAI 요청 결과 반환 + response = await openai_response(openai_api_key, rag_applied_prompt) + + logger.info(f"최종 Response: {response}") + + return RagAppliedResult( + result_text=response.generations[0][0].text, + related_documents=search_results + additional_info, + ).to_dict() + + +# OpenAI 요청 결과 반환 +async def openai_response( + openai_api_key: str, + prompt: str +) -> LLMResult: try: search_llm = ChatOpenAI( temperature=0, model="gpt-4", max_tokens=1500, api_key=openai_api_key ) response = await search_llm.agenerate( - messages=[[HumanMessage(rag_applied_prompt)]] + messages=[[HumanMessage(prompt)]] ) + return response except aiohttp.ClientResponseError as e: if e.status == 429: raise HTTPException( @@ -92,17 +123,15 @@ async def request_rag_applied_openai( ) from e raise HTTPException(500, "Internal Server Error") from e - logger.info(f"최종 Response: {response}") - - return RagAppliedResult( - result_text=response.generations[0][0].text, - related_documents=search_results + additional_info, - ).to_dict() async def create_rag_applied_prompt( - original_prompt: str, relevant_info: List[Union[Document, dict]] + original_prompt: str, relevant_info: List[Union[Document, dict]], original_text: str ) -> str: + # 원문 기사 추가 + original_prompt += f"\n원문 기사:\n{original_text}\n\n" + + # 관련 정보 추가 for idx, info in enumerate(relevant_info): if isinstance(info, Document): title = info.metadata.get("title", "제목 없음") @@ -113,7 +142,7 @@ async def create_rag_applied_prompt( link = info.get("link", "URL 없음") snippet = info.get("snippet", "내용 없음") original_prompt += ( - f"\n{idx + 1}. 제목: {title}\n URL: {link}\n 내용: {snippet}\n" + f"관련된 기사 정보들 \n{idx + 1}. 제목: {title}\n URL: {link}\n 내용: {snippet}\n" ) logger.info(f"RAG Applied Prompt: {original_prompt}") diff --git a/app/service/news_scheduling_service.py b/app/service/news_scheduling_service.py index 169ce1a..8dab876 100644 --- a/app/service/news_scheduling_service.py +++ b/app/service/news_scheduling_service.py @@ -8,6 +8,7 @@ from app.config.loguru_config import logger from app.database.session import db_session from app.model.article_publisher import Publisher +from app.recommend.recommend_service import RecommendService from app.service.article_manage_service import ArticleManageService from app.service.simple_article_service import process_generate_article_by_url @@ -87,7 +88,6 @@ async def run_crawl_and_store(session: AsyncSession): ) - async def schedule_task(): while True: now = datetime.now() diff --git a/app/service/simple_article_service.py b/app/service/simple_article_service.py index a95909b..06f7d8c 100644 --- a/app/service/simple_article_service.py +++ b/app/service/simple_article_service.py @@ -1,3 +1,5 @@ +from datetime import datetime + from sqlalchemy.ext.asyncio import AsyncSession from app.model.article_publisher import find_publisher @@ -35,13 +37,16 @@ async def process_generate_article_by_url( if not ai_result.get("comment") or not ai_result["comment"].strip(): raise ValueError("댓글이 비어 있거나 누락되었습니다") if ai_result.get("category") not in [ - category.value for category in MailTypeCategory + category.name for category in MailTypeCategory ]: raise ValueError(f"유효하지 않은 카테고리입니다: {ai_result.get('category')}") # JSON 객체인 ai_result를 simplified_article 객체로 변환 simplified_article = SimplifiedArticle(**ai_result) + published_at_datetime = datetime.fromisoformat(request_text.pub_date).replace( + tzinfo=None + ) # DB에 저장 await ArticleManageService().create_article( url=url, @@ -52,7 +57,7 @@ async def process_generate_article_by_url( simple_content=simplified_article.content, phrase=simplified_article.phrase, comment=simplified_article.comment, - published_at=request_text.pub_date, + published_at=published_at_datetime, image_url=request_text.image_url, category=MailTypeCategory(ai_result["category"]), session=session,