forked from Maximilian-Winter/AIRoleplay
-
Notifications
You must be signed in to change notification settings - Fork 0
/
start_roleplay.py
147 lines (132 loc) · 5.43 KB
/
start_roleplay.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
from llama_cpp import Llama
from ai_roleplay import AICharacter, load_personality
from ai_roleplay.default_commands import command_registry
class LlamaSettings:
model: str = ""
n_ctx: int = 4096
n_batch: int = 128
n_threads: int = 4
f16_kv: bool = True
use_mlock: bool = True
embedding: bool = False
last_n_tokens_size: int = 64
n_gpu_layers: int = 0
verbose: bool = False
settings = LlamaSettings()
settings.model = "../../ggml-v3-models/WizardLM-Uncensored-SuperCOT-Storytelling.ggmlv3.q4_0.bin"
settings.n_batch = 768
settings.n_gpu_layers = 7
settings.n_threads = 12
settings.last_n_tokens_size = 2048
settings.verbose = False
settings.embedding = False
settings_summarizer = LlamaSettings()
settings_summarizer.model = "../../ggml-v3-models/wizardLM-13B-Uncensored.ggmlv3.q6_K.bin"
settings_summarizer.n_batch = 512
settings_summarizer.n_gpu_layers = 3
settings_summarizer.n_threads = 12
settings_summarizer.last_n_tokens_size = 2048
settings_summarizer.embedding = False
settings_summarizer.verbose = False
main_model = Llama(
settings.model,
n_gpu_layers=settings.n_gpu_layers,
f16_kv=settings.f16_kv,
use_mlock=settings.use_mlock,
embedding=settings.embedding,
n_threads=settings.n_threads,
n_batch=settings.n_batch,
n_ctx=settings.n_ctx,
last_n_tokens_size=settings.last_n_tokens_size,
verbose=settings.verbose
)
summarizer_model = Llama(
settings_summarizer.model,
n_gpu_layers=settings_summarizer.n_gpu_layers,
f16_kv=settings_summarizer.f16_kv,
use_mlock=settings_summarizer.use_mlock,
embedding=settings_summarizer.embedding,
n_threads=settings_summarizer.n_threads,
n_batch=settings_summarizer.n_batch,
n_ctx=2048,
last_n_tokens_size=settings_summarizer.last_n_tokens_size,
verbose=settings_summarizer.verbose
)
def main_generate_function(prompt: str = "", max_tokens: int = 500, temperature: float = 0.7,
top_k: int = 0, top_p: float = 0.5, repeat_penalty: float = 1.2, stream: bool = True):
if character.debug_output:
print(prompt)
result = main_model(
f"{prompt}",
max_tokens=max_tokens,
stream=stream,
stop=['```python', 'Input:', 'Response:', f'{character.user_name}:', '</conversation>', '###',
'Additional context:'],
temperature=temperature,
top_k=top_k,
top_p=top_p,
mirostat_mode=0,
mirostat_tau=5.0,
mirostat_eta=0.1,
repeat_penalty=repeat_penalty,
tfs_z=0.97
)
if character.debug_output:
print(result)
output = ""
token_generator = result
for out in token_generator:
output += out['choices'][0]['text']
print(out['choices'][0]['text'], end="", flush=True)
print("")
return output
def summarizer_generate_function(prompt: str = "", max_tokens: int = 200, temperature: float = 0.1,
top_k: int = 40, top_p: float = 0.9, repeat_penalty: float = 1.2,
stream: bool = True):
if character.debug_output:
print(prompt)
result = summarizer_model(
f"{prompt}",
max_tokens=max_tokens,
stream=stream,
stop=['User:', f'{character.user_name}:', '</conversation>', '###', 'Additional context:'],
temperature=temperature,
top_k=top_k,
top_p=top_p,
mirostat_mode=0,
mirostat_tau=5.0,
mirostat_eta=0.1,
repeat_penalty=repeat_penalty,
)
if character.debug_output:
print(result)
output = ""
token_generator = result
for out in token_generator:
output += out['choices'][0]['text']
print(out['choices'][0]['text'], end="", flush=True)
print("")
return output
description, scenario, feelings, goals, save_directory, location, username, character_name = load_personality('Personalities/RichardFeynman.txt')
character = AICharacter(main_generate_function=main_generate_function,
summarizer_generate_function=summarizer_generate_function,
tokenizer_encode_function=main_model.tokenizer().encode,
character_name=character_name, user_name=username,
system_message="Adopt the personality described in the character section from the user and respond to the user's last message in the conversation history. Consider the user provided scenario, location, character's feelings, character's goals, character's memories and conversation history, when writing a response. Ensure that the response is coherent and in character.",
scenario=scenario,
location=location,
emotional_state=feelings,
save_dir=save_directory,
character_description=description,
objectives=goals,
chat_template_filename="PromptTemplates/chat_instruction.txt",
rate_memory_importance_template_filename="PromptTemplates/rate_memory.txt",
summarizer_template_filename="PromptTemplates/summary.txt",
command_registry=command_registry,
max_output_length=500, max_context_size=settings.n_ctx, manual_summarize=False,
debug_output=True)
character.init_chat()
while True:
user_input = input(">")
character.conversation(user_input)
character.save_bot()