Skip to content

Commit

Permalink
Merge pull request #95 from nollied/cleanup
Browse files Browse the repository at this point in the history
Added new modules and methods, updated version number, and added new dependencies and imports.
  • Loading branch information
steegecs authored Mar 9, 2023
2 parents 25d77e0 + aa1c3c3 commit db2a0c3
Show file tree
Hide file tree
Showing 24 changed files with 315 additions and 111 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ Make some changes to your git repo and stage them. Then, run `mf commit`! You sh
1 file changed, 14 insertions(+)
```

### Create PRs With GPT Titles And Body
Make some changes to your branch and stage, and then commit them. Then, run `mf pr`! A PR should be created with a title and body generated by GPT, and a link to the PR should be printed to the console.
### Create PRs/MRs With GPT Titles And Body
Make some changes to your branch and stage, and then commit them. Then, run `mf pr` for GitHub or `mf mr` for GitLab! A pull request/merge request should be created with a title and body generated by GPT, and a link to the PR should be printed to the console.
- To use this feature, you must first install and authenticate the [GitHub CLI](https://cli.github.com/).

## How does it work?
Expand Down
2 changes: 1 addition & 1 deletion mindflow/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.3.12"
__version__ = "0.3.13"
20 changes: 18 additions & 2 deletions mindflow/core/chat.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,35 @@
from typing import Optional
from mindflow.settings import Settings
from mindflow.utils.constants import MinimumReservedLength
from mindflow.utils.prompts import CHAT_PROMPT_PREFIX
from mindflow.utils.token import get_token_count


def run_chat(prompt: str) -> str:
"""
This function is used to generate a prompt and then use it as a prompt for GPT bot.
"""
settings = Settings()
completion_model = settings.mindflow_models.query.model

if (
get_token_count(completion_model, CHAT_PROMPT_PREFIX + prompt)
> completion_model.hard_token_limit - MinimumReservedLength.CHAT.value
):
print("The prompt is too long. Please try again with a shorter prompt.")
return ""

# Prompt GPT through Mindflow API or locally
response: str = settings.mindflow_models.query.model(
response: Optional[str] = completion_model(
[
{
"role": "system",
"content": "You are a helpful virtual assistant responding to a users query using your general knowledge and the text provided below.",
"content": CHAT_PROMPT_PREFIX,
},
{"role": "user", "content": prompt},
]
)

if response is None:
return "Unable to generate response. Please try again. If the problem persists, please raise an issue at: https://github.com/nollied/mindflow-cli/issues."
return response
7 changes: 5 additions & 2 deletions mindflow/core/git/commit.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import subprocess
from typing import Tuple, Optional
from typing import Tuple, Optional, Union

from mindflow.core.git.diff import run_diff
from mindflow.settings import Settings
from mindflow.utils.errors import ModelError
from mindflow.utils.prompt_builders import build_context_prompt
from mindflow.utils.prompts import COMMIT_PROMPT_PREFIX

Expand All @@ -20,9 +21,11 @@ def run_commit(args: Tuple[str], message_overwrite: Optional[str] = None) -> str
if diff_output == "No staged changes.":
return diff_output

response: str = settings.mindflow_models.query.model(
response: Union[ModelError, str] = settings.mindflow_models.query.model(
build_context_prompt(COMMIT_PROMPT_PREFIX, diff_output)
)
if isinstance(response, ModelError):
return response.commit_message

# add co-authorship to commit message
response += "\n\nCo-authored-by: MindFlow <[email protected]>"
Expand Down
81 changes: 57 additions & 24 deletions mindflow/core/git/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,19 @@
"""
import concurrent.futures
import subprocess
from typing import Dict
from typing import Dict, Union
from typing import List
from typing import Optional
from typing import Tuple

from mindflow.db.objects.model import ConfiguredModel
from mindflow.settings import Settings
from mindflow.utils.constants import MinimumReservedLength
from mindflow.utils.errors import ModelError
from mindflow.utils.prompt_builders import build_context_prompt
from mindflow.utils.prompts import GIT_DIFF_PROMPT_PREFIX

from mindflow.utils.diff_parser import parse_git_diff, IGNORE_FILE_EXTENSIONS
from mindflow.utils.diff_parser import parse_git_diff
from mindflow.utils.token import get_token_count


def run_diff(args: Tuple[str]) -> str:
Expand All @@ -35,18 +37,19 @@ def run_diff(args: Tuple[str]) -> str:
if len(diff_dict) <= 0:
return "No staged changes."

batched_parsed_diff_result = batch_git_diffs(
diff_dict, token_limit=completion_model.hard_token_limit
)
batched_parsed_diff_result = batch_git_diffs(diff_dict, completion_model)

response: str = ""
diff_summary: str = ""
if len(batched_parsed_diff_result) == 1:
content = ""
for file_name, diff_content in batched_parsed_diff_result[0]:
content += f"*{file_name}*\n DIFF CONTENT: {diff_content}\n\n"
response = completion_model(
diff_response: Union[ModelError, str] = completion_model(
build_context_prompt(GIT_DIFF_PROMPT_PREFIX, content)
)
if isinstance(diff_response, ModelError):
return diff_response.diff_message
diff_summary += diff_response
else:
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
Expand All @@ -62,43 +65,73 @@ def run_diff(args: Tuple[str]) -> str:

# Process the results as they become available
for future in concurrent.futures.as_completed(futures):
response += future.result()
diff_partial_response: Union[ModelError, str] = future.result()
if isinstance(diff_partial_response, ModelError):
return diff_partial_response.diff_partial_message

diff_summary += diff_partial_response

if len(excluded_filenames) > 0:
response += f"\n\nNOTE: The following files were excluded from the diff: {', '.join(excluded_filenames)}"
diff_summary += f"\n\nNOTE: The following files were excluded from the diff: {', '.join(excluded_filenames)}"

return response
return diff_summary


import re


def batch_git_diffs(
file_diffs: Dict[str, str], token_limit: int
file_diffs: Dict[str, str], model: ConfiguredModel
) -> List[List[Tuple[str, str]]]:
batches = []
current_batch: List = []
current_batch_size = 0
current_batch_text = ""
for file_name, diff_content in file_diffs.items():
if len(diff_content) > token_limit:
chunks = [
diff_content[i : i + token_limit]
for i in range(0, len(diff_content), token_limit)
]
if (
get_token_count(model, diff_content)
> model.hard_token_limit - MinimumReservedLength.DIFF.value
):
## Split the diff into chunks that are less than the token limit
chunks = [diff_content]
while True:
new_chunks = []
for chunk in chunks:
if (
get_token_count(model, chunk)
> model.hard_token_limit - MinimumReservedLength.DIFF.value
):
half_len = len(chunk) // 2
left_half = chunk[:half_len]
right_half = chunk[half_len:]
new_chunks.extend([left_half, right_half])
else:
new_chunks.append(chunk)
if new_chunks == chunks:
break
chunks = new_chunks

## Add the chunks to the batch or multiple batches
for chunk in chunks:
if current_batch_size + len(chunk) > token_limit * 2:
if (
get_token_count(model, current_batch_text + chunk)
> model.hard_token_limit - MinimumReservedLength.DIFF.value
):
batches.append(current_batch)
current_batch = []
current_batch_size = 0
current_batch_text = ""
current_batch.append((file_name, chunk))
current_batch_size += len(chunk)
elif current_batch_size + len(diff_content) > token_limit * 2:
current_batch_text += chunk

elif (
get_token_count(model, current_batch_text + diff_content)
> model.hard_token_limit - MinimumReservedLength.DIFF.value
):
batches.append(current_batch)
current_batch = [(file_name, diff_content)]
current_batch_size = len(diff_content)
current_batch_text = diff_content
else:
current_batch.append((file_name, diff_content))
current_batch_size += len(diff_content)
current_batch_text += diff_content
if current_batch:
batches.append(current_batch)
return batches
7 changes: 6 additions & 1 deletion mindflow/core/git/mr.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,12 @@ def run_mr(
return

if not title or not description:
title, description = create_title_and_body(base_branch, title, description)
tital_description_tuple = create_title_and_body(base_branch, title, description)

if not tital_description_tuple:
return

title, description = tital_description_tuple

create_merge_request(args, title, description)

Expand Down
30 changes: 23 additions & 7 deletions mindflow/core/git/pr.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import concurrent.futures
import subprocess
from typing import List, Optional, Tuple
from typing import List, Optional, Tuple, Union

from mindflow.core.git.diff import run_diff
from mindflow.settings import Settings
from mindflow.utils.command_parse import get_flag_value
from mindflow.utils.errors import ModelError
from mindflow.utils.prompt_builders import build_context_prompt
from mindflow.utils.prompts import PR_BODY_PREFIX
from mindflow.utils.prompts import PR_TITLE_PREFIX
Expand Down Expand Up @@ -35,8 +36,12 @@ def run_pr(args: Tuple[str], title: Optional[str] = None, body: Optional[str] =
return

if not title or not body:
title, body = create_title_and_body(base_branch, title, body)
tital_body_tuple = create_title_and_body(base_branch, title, body)

if not tital_body_tuple:
return

title, body = tital_body_tuple
create_pull_request(args, title, body)


Expand All @@ -58,11 +63,13 @@ def is_valid_pr(head_branch: str, base_branch: str) -> bool:

def create_title_and_body(
base_branch, title: Optional[str], body: Optional[str]
) -> Tuple[str, str]:
) -> Optional[Tuple[str, str]]:
settings = Settings()

diff_output = run_diff((base_branch,))

title_response: Union[ModelError, str]
body_response: Union[ModelError, str]
if title is None and body is None:
pr_title_prompt = build_context_prompt(PR_TITLE_PREFIX, diff_output)
pr_body_prompt = build_context_prompt(PR_BODY_PREFIX, diff_output)
Expand All @@ -75,16 +82,25 @@ def create_title_and_body(
settings.mindflow_models.query.model, pr_body_prompt
)

title = future_title.result()
body = future_body.result()
title_response = future_title.result()
body_response = future_body.result()
else:
if title is None:
pr_title_prompt = build_context_prompt(PR_TITLE_PREFIX, diff_output)
title = settings.mindflow_models.query.model(pr_title_prompt)
title_response = settings.mindflow_models.query.model(pr_title_prompt)
if body is None:
pr_body_prompt = build_context_prompt(PR_BODY_PREFIX, diff_output)
body = settings.mindflow_models.query.model(pr_body_prompt)
body_response = settings.mindflow_models.query.model(pr_body_prompt)

if isinstance(title_response, ModelError):
print(title_response.pr_message)
return None
if isinstance(body_response, ModelError):
print(body_response.pr_message)
return None

title = title if title is not None else title_response
body = body if body is not None else body_response
return title, body


Expand Down
38 changes: 22 additions & 16 deletions mindflow/core/index.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""
`generate` command
"""
from asyncio import Future
from concurrent.futures import ThreadPoolExecutor
from copy import deepcopy
from typing import List
import logging
from typing import List, Union
from typing import Optional

import numpy as np
Expand All @@ -18,8 +18,10 @@
from mindflow.resolving.resolve import resolve_all
from mindflow.resolving.resolve import return_if_indexable
from mindflow.settings import Settings
from mindflow.utils.errors import ModelError
from mindflow.utils.prompt_builders import build_context_prompt
from mindflow.utils.prompts import INDEX_PROMPT_PREFIX
from mindflow.utils.token import get_batch_token_count, get_token_count


def run_index(document_paths: List[str], refresh: bool, force: bool) -> None:
Expand Down Expand Up @@ -97,9 +99,14 @@ def __init__(
self.start = start
self.end = end
if text:
self.summary = completion_model(
response: Union[str, ModelError] = completion_model(
build_context_prompt(INDEX_PROMPT_PREFIX, text)
)
if isinstance(response, ModelError):
self.summary = ""
print(response.index_message)
else:
self.summary = response

def set_leaves(self, leaves: List["Node"]) -> None:
self.leaves = leaves
Expand Down Expand Up @@ -136,15 +143,6 @@ def iterative_to_dict(self) -> dict:
return node_dict


def count_tokens(text: str) -> int:
"""
Counts/estimates the number of tokens this text will consume by GPT.
"""
# tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
# count = len(tokenizer(text)['input_ids'])
return len(text) // 4 # Token Estimation for speed


# This function is used to split a string into chunks of a specified token limit using binary search
def binary_split_raw_text_to_nodes(
completion_model: ConfiguredModel, text: str
Expand All @@ -156,7 +154,10 @@ def binary_split_raw_text_to_nodes(
stack = [(0, len(text))]
while stack:
start, end = stack.pop()
if count_tokens(text[start:end]) < completion_model.soft_token_limit:
if (
get_token_count(completion_model, text[start:end])
< completion_model.soft_token_limit
):
nodes.append(Node(completion_model, start, end, text[start:end]))
else:
mid = ((end - start) // 2) + start
Expand All @@ -176,7 +177,9 @@ def binary_split_nodes_to_chunks(
while stack:
nodes, start, end = stack.pop()
if (
sum(count_tokens(node.summary) for node in nodes[start:end])
get_batch_token_count(
completion_model, [node.summary for node in nodes[start:end]]
)
< completion_model.soft_token_limit
):
chunks.append(nodes[start:end])
Expand All @@ -195,7 +198,10 @@ def create_nodes(completion_model: ConfiguredModel, leaf_nodes: List[Node]) -> N
while stack:
leaf_nodes, start, end = stack.pop()
if (
sum(count_tokens(leaf_node.summary) for leaf_node in leaf_nodes[start:end])
get_batch_token_count(
completion_model,
[leaf_node.summary for leaf_node in leaf_nodes[start:end]],
)
> completion_model.soft_token_limit
):
node_chunks: List[List[Node]] = binary_split_nodes_to_chunks(
Expand All @@ -222,7 +228,7 @@ def create_text_search_tree(completion_model: ConfiguredModel, text: str) -> dic
"""
This function is used to create a tree of responses from the OpenAI API
"""
if count_tokens(text) < completion_model.soft_token_limit:
if get_token_count(completion_model, text) < completion_model.soft_token_limit:
return Node(completion_model, 0, len(text), text).to_dict()

return create_nodes(
Expand Down
Loading

0 comments on commit db2a0c3

Please sign in to comment.