Skip to content
This repository has been archived by the owner on Jan 7, 2025. It is now read-only.

Changes for 1.0.18 #570

Merged
merged 5 commits into from
Apr 18, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions mentat/code_context.py
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@
from mentat.code_feature import CodeFeature, get_consolidated_feature_refs
from mentat.diff_context import DiffContext
from mentat.errors import PathValidationError
from mentat.git_handler import get_git_root_for_path
from mentat.include_files import (
PathType,
get_code_features_for_path,
@@ -67,6 +68,11 @@ async def refresh_daemon(self):
cwd = ctx.cwd
llm_api_handler = ctx.llm_api_handler

# Use print because stream is not initialized yet
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider using the logging framework instead of print for consistency across the codebase and more control over logging levels and destinations.

print("Scanning codebase for updates...")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The warning message about not being a git repository could potentially be confusing for users if their project is not intended to be a git repository. Consider adding a check to only display this warning if git-related features are being used.

if not get_git_root_for_path(cwd, raise_error=False):
print("\033[93mWarning: Not a git repository (this might take a while)\033[0m")

annotators: dict[str, dict[str, Any]] = {
"hierarchy": {"ignore_patterns": [str(p) for p in self.ignore_patterns]},
"chunker_line": {"lines_per_chunk": 50},
@@ -185,11 +191,15 @@ async def get_code_message(
auto_tokens=auto_tokens,
)
for ref in context_builder.to_refs():
new_features = list[CodeFeature]() # Save ragdaemon context back to include_files
path, interval_str = split_intervals_from_path(Path(ref))
intervals = parse_intervals(interval_str)
for interval in intervals:
feature = CodeFeature(cwd / path, interval)
self.include_features([feature]) # Save ragdaemon context back to include_files
if not interval_str:
new_features.append(CodeFeature(cwd / path))
else:
intervals = parse_intervals(interval_str)
for interval in intervals:
new_features.append(CodeFeature(cwd / path, interval))
self.include_features(new_features)

# The context message is rendered by ragdaemon (ContextBuilder.render())
context_message = context_builder.render()
@@ -417,10 +427,14 @@ async def search(
continue
distance = node["distance"]
path, interval = split_intervals_from_path(Path(node["ref"]))
intervals = parse_intervals(interval)
for _interval in intervals:
feature = CodeFeature(cwd / path, _interval)
if not interval:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's great to see handling for the case where there's no interval specified, simplifying the creation of a CodeFeature for a whole file. This makes the function more robust and easier to understand.

feature = CodeFeature(cwd / path)
all_features_sorted.append((feature, distance))
else:
intervals = parse_intervals(interval)
for _interval in intervals:
feature = CodeFeature(cwd / path, _interval)
all_features_sorted.append((feature, distance))
if max_results is None:
return all_features_sorted
else:
2 changes: 1 addition & 1 deletion mentat/config.py
Original file line number Diff line number Diff line change
@@ -42,7 +42,7 @@ class Config:
)
provider: Optional[str] = attr.field(default=None, metadata={"auto_completions": ["openai", "anthropic", "azure"]})
embedding_model: str = attr.field(
default="text-embedding-ada-002",
default="text-embedding-3-large",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updating the default embedding_model to text-embedding-3-large aligns with the latest models available, potentially offering better performance or features.

metadata={"auto_completions": [model.name for model in models if isinstance(model, EmbeddingModel)]},
)
embedding_provider: Optional[str] = attr.field(
2 changes: 1 addition & 1 deletion mentat/llm_api_handler.py
Original file line number Diff line number Diff line change
@@ -217,7 +217,7 @@ async def call_llm_api(
return response

@api_guard
def call_embedding_api(self, input_texts: list[str], model: str = "text-embedding-ada-002") -> EmbeddingResponse:
def call_embedding_api(self, input_texts: list[str], model: str = "text-embedding-3-large") -> EmbeddingResponse:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updating the default model for embedding API calls to text-embedding-3-large is consistent with the change in config.py. This ensures that the embedding calls use the updated model by default.

ctx = SESSION_CONTEXT.get()
return self.spice.get_embeddings_sync(input_texts, model, provider=ctx.config.embedding_provider)

1 change: 0 additions & 1 deletion mentat/session.py
Original file line number Diff line number Diff line change
@@ -164,7 +164,6 @@ async def _main(self):

await session_context.llm_api_handler.initialize_client()

print("Scanning codebase for updates...")
await code_context.refresh_daemon()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing the print statement here is a good move for cleanliness, especially since the update scanning message has been moved to CodeContext. This helps in keeping the session initialization cleaner.


check_model()