Skip to content

Commit

Permalink
708 move logic for detecting output defence bot filtering (#740)
Browse files Browse the repository at this point in the history
* Renamed method to be clear what defences are being checked

* Moved detection of output defences

* Using await rather than then

* Clearer use of the input defence report

* WIP: openai file doesn't know about the defence report

* WIP: Using new pushMessageToHistory method

* Fixed chat history

* Simpler combining of defence reports

* Consistent blocking rules

* Not mutating chatResponse in the performToolCalls method

* Better loop

* Not mutating chatResponse in the chatGptChatCompletion method

* Simplified return

* Method to add the user messages to chat history

* Better output defence report

* Moved combineChatDefenceReports to chat controller

* No longer exporting getFilterList and detectFilterList

* Fixed test build errors

* detectTriggeredOutputDefences unit tests

* Fixed chat controller tests

* Removed output filtering integration tests

This code is now covered by the unit tests

* Moved utils method to new file

* Fixed remaining tests

* pushMessageToHistory unit tests

* WIP: Now using the updated chat response

* WIP: Fixed chat utils tests

* WIP: Fixed remaining tests

* Fix for response not being set properly

* No longer adding transformed messae twice

* Nicer chat while loop

* Only sending back sent emails, not total emails

* Fixed tests

* Using flatMap

* const updatedChatHistory in low level chat

* Constructing chat response at the end of high level chat

Like what is done in low level chat

* Removed wrong comment

* Fixed tests

* Better function name

* Better promise name

* Not setting sent emails if the message was blocked

* refactor chathistory code to reduce mutation

* change test names and add comment

* adds history check to first test

* added second history check

* removed some comments

* correct some tests in integration/chatController.test

* adds unit test for chatController to make sure history is updated properly

* fixes defence trigger tests that were broken by mocks

* refactors reused mocking code

* added unit test to check history update in sandbox

* update first test to include existing history

* makes second test use existing history

* adds comment that points out some weirdness

* polishes off those tests

* fixes weirdness about combining the empty defence report

* fixes problem of not getting updated chat history

* respond to chris - makes chatHistoryWithNewUsermessages more concise

* respond to chris - adds back useful comment

* simplify transformed message ternary expression

* refactors transformMessage and only calls combineTransformedMessage once

---------

Co-authored-by: Peter Marsh <[email protected]>
  • Loading branch information
gsproston-scottlogic and pmarsh-scottlogic authored Jan 18, 2024
1 parent 45e2a41 commit b2f1a42
Show file tree
Hide file tree
Showing 12 changed files with 907 additions and 367 deletions.
215 changes: 129 additions & 86 deletions backend/src/controller/chatController.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@ import { Response } from 'express';

import {
transformMessage,
detectTriggeredDefences,
detectTriggeredInputDefences,
combineTransformedMessage,
detectTriggeredOutputDefences,
} from '@src/defence';
import { OpenAiAddHistoryRequest } from '@src/models/api/OpenAiAddHistoryRequest';
import { OpenAiChatRequest } from '@src/models/api/OpenAiChatRequest';
import { OpenAiClearRequest } from '@src/models/api/OpenAiClearRequest';
import { OpenAiGetHistoryRequest } from '@src/models/api/OpenAiGetHistoryRequest';
import {
CHAT_MESSAGE_TYPE,
ChatDefenceReport,
ChatHistoryMessage,
ChatHttpResponse,
ChatModel,
Expand All @@ -21,27 +23,81 @@ import { Defence } from '@src/models/defence';
import { EmailInfo } from '@src/models/email';
import { LEVEL_NAMES } from '@src/models/level';
import { chatGptSendMessage } from '@src/openai';
import { pushMessageToHistory } from '@src/utils/chat';

import { handleChatError } from './handleError';

function combineChatDefenceReports(
reports: ChatDefenceReport[]
): ChatDefenceReport {
const combinedReport: ChatDefenceReport = {
blockedReason: reports
.filter((report) => report.blockedReason !== null)
.map((report) => report.blockedReason)
.join('\n'),
isBlocked: reports.some((report) => report.isBlocked),
alertedDefences: reports.flatMap((report) => report.alertedDefences),
triggeredDefences: reports.flatMap((report) => report.triggeredDefences),
};
return combinedReport;
}

function createNewUserMessages(
message: string,
transformedMessage: string | null
): ChatHistoryMessage[] {
if (transformedMessage) {
// if message has been transformed
return [
// original message
{
completion: null,
chatMessageType: CHAT_MESSAGE_TYPE.USER,
infoMessage: message,
},
// transformed message
{
completion: {
role: 'user',
content: transformedMessage,
},
chatMessageType: CHAT_MESSAGE_TYPE.USER_TRANSFORMED,
},
];
} else {
// not transformed, so just return the original message
return [
{
completion: {
role: 'user',
content: message,
},
chatMessageType: CHAT_MESSAGE_TYPE.USER,
},
];
}
}

// handle the chat logic for level 1 and 2 with no defences applied
async function handleLowLevelChat(
message: string,
chatResponse: ChatHttpResponse,
currentLevel: LEVEL_NAMES,
chatModel: ChatModel,
chatHistory: ChatHistoryMessage[],
defences: Defence[],
sentEmails: EmailInfo[]
defences: Defence[]
): Promise<LevelHandlerResponse> {
const updatedChatHistory = createNewUserMessages(message, null).reduce(
pushMessageToHistory,
chatHistory
);

// get the chatGPT reply
const openAiReply = await chatGptSendMessage(
chatHistory,
updatedChatHistory,
defences,
chatModel,
message,
false,
sentEmails,
currentLevel
);

Expand All @@ -65,82 +121,71 @@ async function handleHigherLevelChat(
currentLevel: LEVEL_NAMES,
chatModel: ChatModel,
chatHistory: ChatHistoryMessage[],
defences: Defence[],
sentEmails: EmailInfo[]
defences: Defence[]
): Promise<LevelHandlerResponse> {
let updatedChatHistory = [...chatHistory];
let updatedChatResponse = {
...chatResponse,
};
// transform the message according to active defences
const transformedMessage = transformMessage(message, defences);
const transformedMessageCombined = transformedMessage
? combineTransformedMessage(transformedMessage)
: null;
const chatHistoryWithNewUserMessages = createNewUserMessages(
message,
transformedMessageCombined ?? null
).reduce(pushMessageToHistory, chatHistory);

if (transformedMessage) {
// if message has been transformed then add the original to chat history and send transformed to chatGPT
updatedChatHistory = [
...updatedChatHistory,
{
completion: null,
chatMessageType: CHAT_MESSAGE_TYPE.USER,
infoMessage: message,
},
];

updatedChatResponse = {
...updatedChatResponse,
transformedMessage,
};
}
// detect defences on input message
const triggeredDefencesPromise = detectTriggeredDefences(message, defences);
const triggeredInputDefencesPromise = detectTriggeredInputDefences(
message,
defences
);

// get the chatGPT reply
const openAiReplyPromise = chatGptSendMessage(
updatedChatHistory,
chatHistoryWithNewUserMessages,
defences,
chatModel,
transformedMessage
? combineTransformedMessage(transformedMessage)
: message,
transformedMessage ? true : false,
sentEmails,
transformedMessageCombined ?? message,
currentLevel
);

// run defence detection and chatGPT concurrently
const [defenceReport, openAiReply] = await Promise.all([
triggeredDefencesPromise,
const [inputDefenceReport, openAiReply] = await Promise.all([
triggeredInputDefencesPromise,
openAiReplyPromise,
]);

// if input message is blocked, restore the original chat history and add user message (not as completion)
if (defenceReport.isBlocked) {
updatedChatHistory = [
...updatedChatHistory,
{
const botReply = openAiReply.chatResponse.completion?.content?.toString();
const outputDefenceReport = botReply
? detectTriggeredOutputDefences(botReply, defences)
: null;

const defenceReports = outputDefenceReport
? [inputDefenceReport, outputDefenceReport]
: [inputDefenceReport];
const combinedDefenceReport = combineChatDefenceReports(defenceReports);

// if blocked, restore original chat history and add user message to chat history without completion
const updatedChatHistory = combinedDefenceReport.isBlocked
? pushMessageToHistory(chatHistory, {
completion: null,
chatMessageType: CHAT_MESSAGE_TYPE.USER,
infoMessage: message,
},
];
updatedChatResponse = {
...updatedChatResponse,
defenceReport,
};
} else {
updatedChatHistory = openAiReply.chatHistory;
updatedChatResponse = {
...updatedChatResponse,
reply: openAiReply.chatResponse.completion?.content?.toString() ?? '',
wonLevel: openAiReply.chatResponse.wonLevel,
openAIErrorMessage: openAiReply.chatResponse.openAIErrorMessage,
defenceReport,
};
}
})
: openAiReply.chatHistory;

const updatedChatResponse: ChatHttpResponse = {
...chatResponse,
defenceReport: combinedDefenceReport,
openAIErrorMessage: openAiReply.chatResponse.openAIErrorMessage,
reply: !combinedDefenceReport.isBlocked && botReply ? botReply : '',
transformedMessage: transformedMessage ?? undefined,
wonLevel:
openAiReply.chatResponse.wonLevel && !combinedDefenceReport.isBlocked,
};
return {
chatResponse: updatedChatResponse,
chatHistory: updatedChatHistory,
sentEmails: openAiReply.sentEmails,
sentEmails: combinedDefenceReport.isBlocked ? [] : openAiReply.sentEmails,
};
}

Expand All @@ -149,7 +194,7 @@ async function handleChatToGPT(req: OpenAiChatRequest, res: Response) {
const initChatResponse: ChatHttpResponse = {
reply: '',
defenceReport: {
blockedReason: '',
blockedReason: null,
isBlocked: false,
alertedDefences: [],
triggeredDefences: [],
Expand Down Expand Up @@ -184,8 +229,6 @@ async function handleChatToGPT(req: OpenAiChatRequest, res: Response) {
const totalSentEmails: EmailInfo[] = [
...req.session.levelState[currentLevel].sentEmails,
];
// keep track of the number of sent emails
const numSentEmails = totalSentEmails.length;

// use default model for levels, allow user to select in sandbox
const chatModel =
Expand All @@ -208,8 +251,7 @@ async function handleChatToGPT(req: OpenAiChatRequest, res: Response) {
currentLevel,
chatModel,
currentChatHistory,
defences,
totalSentEmails
defences
);
} else {
// apply the defence detection for level 3 and sandbox
Expand All @@ -219,8 +261,7 @@ async function handleChatToGPT(req: OpenAiChatRequest, res: Response) {
currentLevel,
chatModel,
currentChatHistory,
defences,
totalSentEmails
defences
);
}
} catch (error) {
Expand All @@ -234,7 +275,7 @@ async function handleChatToGPT(req: OpenAiChatRequest, res: Response) {
return;
}

const updatedChatHistory = levelResult.chatHistory;
let updatedChatHistory = levelResult.chatHistory;
totalSentEmails.push(...levelResult.sentEmails);

// update chat response
Expand All @@ -243,22 +284,19 @@ async function handleChatToGPT(req: OpenAiChatRequest, res: Response) {
reply: levelResult.chatResponse.reply,
wonLevel: levelResult.chatResponse.wonLevel,
openAIErrorMessage: levelResult.chatResponse.openAIErrorMessage,
sentEmails: levelResult.sentEmails.slice(numSentEmails),
sentEmails: levelResult.sentEmails,
defenceReport: levelResult.chatResponse.defenceReport,
transformedMessage: levelResult.chatResponse.transformedMessage,
};

if (updatedChatResponse.defenceReport.isBlocked) {
// chatReponse.reply is empty if blocked
updatedChatHistory.push({
updatedChatHistory = pushMessageToHistory(updatedChatHistory, {
completion: null,
chatMessageType: CHAT_MESSAGE_TYPE.BOT_BLOCKED,
infoMessage: updatedChatResponse.defenceReport.blockedReason,
});
}

// more error handling
else if (updatedChatResponse.openAIErrorMessage) {
} else if (updatedChatResponse.openAIErrorMessage) {
const errorMsg = simplifyOpenAIErrorMessage(
updatedChatResponse.openAIErrorMessage
);
Expand All @@ -276,6 +314,15 @@ async function handleChatToGPT(req: OpenAiChatRequest, res: Response) {
);
handleChatError(res, updatedChatResponse, errorMsg, 500);
return;
} else {
// add bot message to chat history
updatedChatHistory = pushMessageToHistory(updatedChatHistory, {
completion: {
role: 'assistant',
content: updatedChatResponse.reply,
},
chatMessageType: CHAT_MESSAGE_TYPE.BOT,
});
}

// update state
Expand Down Expand Up @@ -303,16 +350,12 @@ function addErrorToChatHistory(
chatHistory: ChatHistoryMessage[],
errorMessage: string
): ChatHistoryMessage[] {
const updatedChatHistory = [
...chatHistory,
{
completion: null,
chatMessageType: CHAT_MESSAGE_TYPE.ERROR_MSG,
infoMessage: errorMessage,
},
];
console.error(errorMessage);
return updatedChatHistory;
return pushMessageToHistory(chatHistory, {
completion: null,
chatMessageType: CHAT_MESSAGE_TYPE.ERROR_MSG,
infoMessage: errorMessage,
});
}

function handleGetChatHistory(req: OpenAiGetHistoryRequest, res: Response) {
Expand All @@ -335,14 +378,14 @@ function handleAddToChatHistory(req: OpenAiAddHistoryRequest, res: Response) {
level !== undefined &&
level >= LEVEL_NAMES.LEVEL_1
) {
req.session.levelState[level].chatHistory = [
...req.session.levelState[level].chatHistory,
req.session.levelState[level].chatHistory = pushMessageToHistory(
req.session.levelState[level].chatHistory,
{
completion: null,
chatMessageType,
infoMessage,
},
];
}
);
res.send();
} else {
res.status(400);
Expand Down
Loading

0 comments on commit b2f1a42

Please sign in to comment.