Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge development into main #244

Merged
merged 20 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
c3d091b
fix: the cosine similarity is evaluated for top comments and bot comm…
gentlementlegen Dec 26, 2024
8d65638
chore: fixed cspell words
gentlementlegen Dec 26, 2024
3cbd808
chore: only curated comments are taken into account to build the prom…
gentlementlegen Dec 29, 2024
a0695e3
chore: token limit is configuration through the configuration file
gentlementlegen Dec 29, 2024
70705b7
test: fixed configuration test
gentlementlegen Dec 29, 2024
538d177
chore: throw on missing token limit
gentlementlegen Jan 4, 2025
93308f4
fix: long prompts are now split by n-chucks and averaged after evalua…
gentlementlegen Jan 4, 2025
5e1c5d8
chore: removed unused package natural
gentlementlegen Jan 4, 2025
5ba0013
chore: updated manifest.json and dist build
github-actions[bot] Jan 4, 2025
b13cae5
chore: removed cspell word
gentlementlegen Jan 4, 2025
80df742
chore: fixed repo payload
gentlementlegen Jan 8, 2025
3c42275
chore: fixed ref url
gentlementlegen Jan 8, 2025
4e544c5
Merge branch 'development' into fix/tfidf
gentlementlegen Jan 8, 2025
9ac8052
fix: the comment splitting now processes differently pull-request and…
gentlementlegen Jan 8, 2025
32a2d09
chore: updated manifest.json and dist build
github-actions[bot] Jan 8, 2025
9bcf29d
chore: bump sdk package version
gentlementlegen Jan 11, 2025
862306f
chore: updated manifest.json and dist build
github-actions[bot] Jan 11, 2025
46a9539
Merge branch 'development' into fix/tfidf
gentlementlegen Jan 16, 2025
62bf7eb
chore: updated manifest.json and dist build
github-actions[bot] Jan 16, 2025
a1f4541
Merge pull request #225 from gentlementlegen/fix/tfidf
gentlementlegen Jan 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ with:
openAi:
model: "gpt-4o"
endpoint: "https://api.openai.com/v1"
tokenCountLimit: 124000
multipliers:
- role: [ISSUE_SPECIFICATION]
relevance: 1
Expand Down
Binary file modified bun.lockb
Binary file not shown.
4 changes: 2 additions & 2 deletions dist/index.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/index.js.map

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@
"examples": ["gpt-4o"],
"type": "string"
},
"tokenCountLimit": {
"default": 124000,
"description": "Token count limit for a given model. If the content goes beyond the token limit, content will get truncated during evaluation.",
"examples": [124000],
"type": "integer"
},
"endpoint": {
"default": "https://api.openai.com/v1",
"pattern": "^(https?:\\/\\/[^\\s$.?#].\\S*)$",
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"@supabase/supabase-js": "2.42.0",
"@ubiquity-dao/rpc-handler": "1.3.0",
"@ubiquity-os/permit-generation": "^2.0.6",
"@ubiquity-os/plugin-sdk": "^2.0.0",
"@ubiquity-os/plugin-sdk": "^2.0.1",
"@ubiquity-os/ubiquity-os-logger": "^1.3.2",
"decimal.js": "10.4.3",
"ethers": "^5.7.2",
Expand Down
6 changes: 6 additions & 0 deletions src/configuration/content-evaluator-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ const openAiType = Type.Object(
description: "OpenAI model, e.g. gpt-4o",
examples: ["gpt-4o"],
}),
tokenCountLimit: Type.Integer({
default: 124000,
description:
"Token count limit for a given model. If the content goes beyond the token limit, content will get truncated during evaluation.",
examples: [124000],
}),
/**
* Specific endpoint to send the comments to.
*/
Expand Down
123 changes: 102 additions & 21 deletions src/parser/content-evaluator-module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,13 @@ export class ContentEvaluatorModule extends BaseModule {

async transform(data: Readonly<IssueActivity>, result: Result) {
const promises: Promise<GithubCommentScore[]>[] = [];
const allCommentsUnClean = data.allComments || [];
const allComments: { id: number; comment: string; author: string }[] = [];
for (const commentObj of allCommentsUnClean) {
if (commentObj.user) {
allComments.push({ id: commentObj.id, comment: commentObj.body ?? "", author: commentObj.user.login });

for (const [user, data] of Object.entries(result)) {
if (data.comments?.length) {
allComments.push(
...data.comments.map((comment) => ({ id: comment.id, comment: comment.content, author: user }))
);
}
}

Expand Down Expand Up @@ -165,29 +167,108 @@ export class ContentEvaluatorModule extends BaseModule {
return { commentsToEvaluate, prCommentsToEvaluate };
}

async _evaluateComments(
_splitArrayToChunks<T extends CommentToEvaluate[] | AllComments>(array: T, chunks: number) {
const arrayCopy = [...array];
const result = [];
for (let i = chunks; i > 0; i--) {
result.push(arrayCopy.splice(0, Math.ceil(arrayCopy.length / i)));
}
return result;
}

async _splitPromptForIssueCommentEvaluation(
specification: string,
comments: CommentToEvaluate[],
allComments: AllComments
) {
const commentRelevances: Relevances = {};
const chunks = 2;

for (const commentSplit of this._splitArrayToChunks(allComments, chunks)) {
const dummyResponse = JSON.stringify(this._generateDummyResponse(comments), null, 2);
const maxTokens = this._calculateMaxTokens(dummyResponse);
const promptForComments = this._generatePromptForComments(specification, comments, commentSplit);

for (const [key, value] of Object.entries(await this._submitPrompt(promptForComments, maxTokens))) {
if (commentRelevances[key]) {
commentRelevances[key] = new Decimal(commentRelevances[key]).add(value).toNumber();
} else {
commentRelevances[key] = value;
}
}
}
for (const key of Object.keys(commentRelevances)) {
commentRelevances[key] = new Decimal(commentRelevances[key]).div(chunks).toNumber();
}

return commentRelevances;
}

async _splitPromptForPullRequestCommentEvaluation(specification: string, comments: PrCommentToEvaluate[]) {
const commentRelevances: Relevances = {};
const chunks = 2;

for (const commentSplit of this._splitArrayToChunks(comments, chunks)) {
const dummyResponse = JSON.stringify(this._generateDummyResponse(commentSplit), null, 2);
const maxTokens = this._calculateMaxTokens(dummyResponse);
const promptForComments = this._generatePromptForPrComments(specification, commentSplit);

for (const [key, value] of Object.entries(await this._submitPrompt(promptForComments, maxTokens))) {
if (commentRelevances[key]) {
commentRelevances[key] = new Decimal(commentRelevances[key]).add(value).toNumber();
} else {
commentRelevances[key] = value;
}
}
}
for (const key of Object.keys(commentRelevances)) {
commentRelevances[key] = new Decimal(commentRelevances[key]).div(chunks).toNumber();
}

return commentRelevances;
}

async _evaluateComments(
specification: string,
userIssueComments: CommentToEvaluate[],
allComments: AllComments,
prComments: PrCommentToEvaluate[]
userPrComments: PrCommentToEvaluate[]
): Promise<Relevances> {
let commentRelevances: Relevances = {};
let prCommentRelevances: Relevances = {};

if (comments.length) {
const dummyResponse = JSON.stringify(this._generateDummyResponse(comments), null, 2);
if (!this._configuration?.openAi.tokenCountLimit) {
throw this.context.logger.fatal("Token count limit is missing, comments cannot be evaluated.");
}

const tokenLimit = this._configuration?.openAi.tokenCountLimit;

if (userIssueComments.length) {
const dummyResponse = JSON.stringify(this._generateDummyResponse(userIssueComments), null, 2);
const maxTokens = this._calculateMaxTokens(dummyResponse);

const promptForComments = this._generatePromptForComments(specification, comments, allComments);
commentRelevances = await this._submitPrompt(promptForComments, maxTokens);
const promptForIssueComments = this._generatePromptForComments(specification, userIssueComments, allComments);
if (this._calculateMaxTokens(promptForIssueComments, Infinity) > tokenLimit) {
commentRelevances = await this._splitPromptForIssueCommentEvaluation(
specification,
userIssueComments,
allComments
);
} else {
commentRelevances = await this._submitPrompt(promptForIssueComments, maxTokens);
}
}

if (prComments.length) {
const dummyResponse = JSON.stringify(this._generateDummyResponse(prComments), null, 2);
if (userPrComments.length) {
const dummyResponse = JSON.stringify(this._generateDummyResponse(userPrComments), null, 2);
const maxTokens = this._calculateMaxTokens(dummyResponse);

const promptForPrComments = this._generatePromptForPrComments(specification, prComments);
prCommentRelevances = await this._submitPrompt(promptForPrComments, maxTokens);
const promptForPrComments = this._generatePromptForPrComments(specification, userPrComments);
if (this._calculateMaxTokens(promptForPrComments, Infinity) > tokenLimit) {
prCommentRelevances = await this._splitPromptForPullRequestCommentEvaluation(specification, userPrComments);
} else {
prCommentRelevances = await this._submitPrompt(promptForPrComments, maxTokens);
}
}

return { ...commentRelevances, ...prCommentRelevances };
Expand Down Expand Up @@ -229,12 +310,12 @@ export class ContentEvaluatorModule extends BaseModule {
}
}

_generatePromptForComments(issue: string, comments: CommentToEvaluate[], allComments: AllComments) {
_generatePromptForComments(issue: string, userComments: CommentToEvaluate[], allComments: AllComments) {
if (!issue?.length) {
throw new Error("Issue specification comment is missing or empty");
}
const allCommentsMap = allComments.map((value) => `${value.id} - ${value.author}: "${value.comment}"`);
const commentsMap = comments.map((value) => `${value.id}: "${value.comment}"`);
const userCommentsMap = userComments.map((value) => `${value.id}: "${value.comment}"`);
return `
Evaluate the relevance of GitHub comments to an issue. Provide a JSON object with comment IDs and their relevance scores.
Issue: ${issue}
Expand All @@ -243,7 +324,7 @@ export class ContentEvaluatorModule extends BaseModule {
${allCommentsMap.join("\n")}

Comments to evaluate:
${commentsMap.join("\n")}
${userCommentsMap.join("\n")}

Instructions:
1. Read all comments carefully, considering their context and content.
Expand All @@ -264,18 +345,18 @@ export class ContentEvaluatorModule extends BaseModule {
Notes:
- Even minor details may be significant.
- Comments may reference earlier comments.
- The number of entries in the JSON response must equal ${commentsMap.length}.
- The number of entries in the JSON response must equal ${userCommentsMap.length}.
`;
}

_generatePromptForPrComments(issue: string, comments: PrCommentToEvaluate[]) {
_generatePromptForPrComments(issue: string, userComments: PrCommentToEvaluate[]) {
if (!issue?.length) {
throw new Error("Issue specification comment is missing or empty");
}
return `I need to evaluate the value of a GitHub contributor's comments in a pull request. Some of these comments are code review comments, and some are general suggestions or a part of the discussion. I'm interested in how much each comment helps to solve the GitHub issue and improve code quality. Please provide a float between 0 and 1 to represent the value of each comment. A score of 1 indicates that the comment is very valuable and significantly improves the submitted solution and code quality, whereas a score of 0 indicates a negative or zero impact. A stringified JSON is given below that contains the specification of the GitHub issue, and comments by different contributors. The property "diffHunk" presents the chunk of code being addressed for a possible change in a code review comment. \n\n\`\`\`\n${JSON.stringify(
{ specification: issue, comments: comments }
{ specification: issue, comments: userComments }
)}\n\`\`\`\n\n\nTo what degree are each of the comments valuable? Please reply with ONLY a JSON where each key is the comment ID given in JSON above, and the value is a float number between 0 and 1 corresponding to the comment. The float number should represent the value of the comment for improving the issue solution and code quality. The total number of properties in your JSON response should equal exactly ${
comments.length
userComments.length
}.`;
}
}
2 changes: 1 addition & 1 deletion src/web/api/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ const baseApp = createPlugin<PluginSettings, EnvConfig, null, SupportedEvents>(
logLevel: (process.env.LOG_LEVEL as LogLevel) ?? "info",
settingsSchema: pluginSettingsSchema,
envSchema: envConfigSchema,
postCommentOnError: true,
postCommentOnError: false,
bypassSignatureVerification: true,
}
);
Expand Down
7 changes: 4 additions & 3 deletions src/web/api/payload.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,17 @@ export async function getPayload(ownerRepo: string, issueId: number, useOpenAi:
const filePath = path.resolve(__dirname, "../.ubiquity-os.config.yml");
const fileContent = await fs.readFile(filePath, "utf8");
const cfgFile = YAML.parse(fileContent);
const owner = ownerRepo.split("/")[0];
const [owner, repo] = ownerRepo.split("/");

if (!useOpenAi) {
cfgFile.incentives.contentEvaluator.openAi = {
...cfgFile.incentives.contentEvaluator.openAi,
endpoint: "http://localhost:4000/openai",
};
}

return {
ref: "development",
ref: "http://localhost",
stateId: "1234",
signature: "",
eventName: "issues.closed",
Expand Down Expand Up @@ -104,7 +105,7 @@ export async function getPayload(ownerRepo: string, issueId: number, useOpenAi:
repository: {
id: 1296269,
node_id: `MDEwOlJlcG9zaXRvcnkxMjk2MjY5`,
name: owner,
name: repo,
full_name: ownerRepo,
owner: {
login: owner,
Expand Down
2 changes: 2 additions & 0 deletions tests/content-evaluator-config.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ describe("ContentEvaluatorConfiguration Validation", () => {
openAi: {
model: "gpt-4o-2024-08-06",
endpoint: "not-a-valid-url",
tokenCountLimit: 0,
},
};

Expand All @@ -27,6 +28,7 @@ describe("ContentEvaluatorConfiguration Validation", () => {
openAi: {
model: "gpt-4o-2024-08-06",
endpoint: "https://api.openai.com/v1",
tokenCountLimit: 100,
},
};

Expand Down
Loading