Skip to content

Commit

Permalink
Merge pull request #1337 from Devparihar5/fix-github
Browse files Browse the repository at this point in the history
fix:GitHubLoader to Handle Binary Files
  • Loading branch information
dartpain authored Oct 26, 2024
2 parents c44ff77 + d3238de commit bea0cca
Showing 1 changed file with 11 additions and 6 deletions.
17 changes: 11 additions & 6 deletions application/parser/remote/github_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import List
from application.parser.remote.base import BaseRemote
from langchain_core.documents import Document
import mimetypes

class GitHubLoader(BaseRemote):
def __init__(self):
Expand All @@ -18,13 +19,17 @@ def fetch_file_content(self, repo_url: str, file_path: str) -> str:

if response.status_code == 200:
content = response.json()
mime_type, _ = mimetypes.guess_type(file_path) # Guess the MIME type based on the file extension

if content.get("encoding") == "base64":
try:
decoded_content = base64.b64decode(content["content"]).decode("utf-8")
return f"Filename: {file_path}\n\n{decoded_content}"
except Exception as e:
print(f"Error decoding content for {file_path}: {e}")
raise
if mime_type and mime_type.startswith("text"): # Handle only text files
try:
decoded_content = base64.b64decode(content["content"]).decode("utf-8")
return f"Filename: {file_path}\n\n{decoded_content}"
except Exception as e:
raise e
else:
return f"Filename: {file_path} is a binary file and was skipped."
else:
return f"Filename: {file_path}\n\n{content['content']}"
else:
Expand Down

0 comments on commit bea0cca

Please sign in to comment.