diff --git a/illufly/__version__.py b/illufly/__version__.py index 189be817..d6599b22 100644 --- a/illufly/__version__.py +++ b/illufly/__version__.py @@ -1 +1 @@ -__version__ = 'v0.2.0' \ No newline at end of file +__version__ = 'v0.2.1' \ No newline at end of file diff --git a/illufly/importer/local_files.py b/illufly/importer/local_files.py deleted file mode 100644 index 50e4bc9b..00000000 --- a/illufly/importer/local_files.py +++ /dev/null @@ -1,171 +0,0 @@ -from typing import Iterator, List, Union -from langchain_core.documents import Document -from langchain_community.document_loaders import ( - TextLoader, - PyPDFLoader, - Docx2txtLoader, - UnstructuredMarkdownLoader, -) -from langchain_core.embeddings import Embeddings -from langchain_community.document_loaders.base import BaseLoader -from langchain_text_splitters import TextSplitter - -from ..config import get_folder_root, get_env -from ..utils import raise_not_install, hash_text, clean_filename -from ..project import is_project_existing, BaseProject -from ..writing import MarkdownLoader -from ..io import TextBlock - -from .qa_excel import QAExcelsLoader - -import os -import re -import sys -import pickle -# import subprocess - -def collect_docs(docs: List[str]) -> str: - """ - 如果 Document 中包含的 metadata['answer'] 属性就优先采纳。 - """ - return "\n-----------------------------------\n".join([ - d.page_content + "\n" + d.metadata['answer'] if 'answer' in d.metadata else d.page_content - for d in docs - ]) - -def get_file_extension(filename: str) -> str: - """Get File Extension""" - return filename.split(".")[-1].lower() - -class FileLoadFactory: - @staticmethod - def get_loader(filename): - ext = get_file_extension(filename) - if ext == "md": - return MarkdownLoader(filename) - elif ext == "xlsx": - return QAExcelsLoader(filename) - elif ext == "pdf": - try: - import pypdf - return PyPDFLoader(filename) - except BaseException as e: - raise_not_install('pypdf') - elif ext == "docx": - try: - import docx2txt - return Docx2txtLoader(filename) - except BaseException as e: - raise_not_install('docx2txt') - elif ext == "txt": - return TextLoader(filename, autodetect_encoding=True) - else: - info = f"WARNING: Loaded File extension {ext} not supported now." - print(get_warn_color() + info + "\033[0m") - - return None - -class LocalFilesLoader(BaseLoader): - """ - 从本地文件中检索知识文档,支持docx、pdf、txt、md、xlsx等文档。 - - 文档位置: - - 加载文档的位置由 {docs_folders} 指定,允许用列表指定多个(没有指定就选用 {ILLUFLY_DOCS} 环境变量) - - {docs_folders} 应当描述为相对于 {base_folder} 的相对位置 - - 文本嵌入的缓存 {cache_folder} 默认是 {base_folder},也可以专门指定 - - 过滤规则包含: - - 按目录开头过滤:由 included_prefixes 指定,以列表中的字符串开头就保留 - - 按目录开头排除:由 excluded_prefixes 指定,以列表中的字符串开头就排除 - - 按路径正则匹配:由 path_regex 指定,应当是正则表达式,通常作为文件的过滤规则使用 - - 按扩展名过滤文件:由 extensions 指定,默认为 ["docx", "pdf", "md", "txt", "xlsx"] - """ - - def __init__( - self, - docs_folders: Union[str, List[str]]=None, - cache_folder: str=None, - path_regex: str=None, - included_prefixes: List[str] = [], - excluded_prefixes: List[str] = [], - extensions: List[str] = [], - base_folder: str=None, - text_spliter: TextSplitter=None, - *args, **kwargs - ): - if isinstance(docs_folders, str): - self.docs_folders = [docs_folders] - elif isinstance(docs_folders, list): - self.docs_folders = docs_folders - elif docs_folders == None: - self.docs_folders = [get_env("ILLUFLY_DOCS")] - else: - raise(ValueError("base_folder: MUST be str or list[str]: ", base_folder)) - - self.base_folder = base_folder or get_folder_root() - self.cache_folder = cache_folder or self.base_folder - - self.path_regex = path_regex or ".*" - self.included_prefixes = included_prefixes - self.excluded_prefixes = excluded_prefixes - self.extensions = extensions or ["docx", "pdf", "md", "txt", "xlsx"] - - self.text_spliter = text_spliter - - def get_files(self) -> list[str]: - """ - 按照规则设定过滤本地资料文件。 - """ - files = [] - - documents_folders = [os.path.join(self.base_folder, folder) for folder in self.docs_folders] - for folder in documents_folders: - if is_project_existing(folder): - project = BaseProject(folder, self.base_folder) - files.extend(list(project.embedding_files)) - else: - for dirpath, dirnames, filenames in os.walk(folder): - for filename in filenames: - relpath = os.path.relpath(os.path.join(dirpath, filename), folder) - if relpath.startswith(".") or re.search('/.', relpath): - # 确保不包含以.开头的文件夹或文件 - continue - if self.included_prefixes and not any(relpath.startswith(include) for include in self.included_prefixes): - continue - if self.excluded_prefixes and any(relpath.startswith(exclude) for exclude in self.excluded_prefixes): - continue - if self.path_regex and not re.search(self.path_regex, relpath): - continue - if self.extensions and get_file_extension(filename) not in self.extensions: - continue - files.append(os.path.join(dirpath, filename)) - - return files - - def load_docs(self, filename: str) -> List[Document]: - """ - 按照文档类型加载文档,并直输出循环拆分后的文档块。 - """ - file_loader = FileLoadFactory.get_loader(filename) - if file_loader: - return file_loader.load_and_split(self.text_spliter) - else: - return [] - - def lazy_load(self) -> Iterator[Document]: - """ - 为每个文件重新分配块结构。 - """ - for filename in self.get_files(): - file_docs = self.load_docs(filename) - for doc in file_docs: - yield doc - - def load(self) -> List[Document]: - """ - 如果直接使用这个方法,将会直接调用load_docs方法。 - - 默认的load_docs方法会将文档做整体切分,然后直接输出。 - 这不是最优的RAG处理方式,但足够简单。 - """ - return list(self.lazy_load()) \ No newline at end of file diff --git a/illufly/knowledge/__init__.py b/illufly/knowledge/__init__.py index c5fda15b..e69de29b 100644 --- a/illufly/knowledge/__init__.py +++ b/illufly/knowledge/__init__.py @@ -1,2 +0,0 @@ -from .local_files import LocalFilesLoader, collect_docs -from .qa_excel import QAExcelsLoader diff --git a/illufly/knowledge/base.py b/illufly/knowledge/base.py deleted file mode 100644 index 0ec3d16f..00000000 --- a/illufly/knowledge/base.py +++ /dev/null @@ -1,9 +0,0 @@ -def load_docs(filename: str) -> List[Document]: - """ - 按照文档类型加载文档,并直输出循环拆分后的文档块。 - """ - file_loader = FileLoadFactory.get_loader(filename) - if file_loader: - return file_loader.load_and_split(self.text_spliter) - else: - return [] \ No newline at end of file diff --git a/illufly/knowledge/local_files.py b/illufly/knowledge/local_files.py deleted file mode 100644 index 2610c5db..00000000 --- a/illufly/knowledge/local_files.py +++ /dev/null @@ -1,254 +0,0 @@ -from typing import Iterator, List, Union -from langchain_core.documents import Document -from langchain_community.document_loaders import ( - TextLoader, - PyPDFLoader, - Docx2txtLoader, - UnstructuredMarkdownLoader, -) -from langchain_core.embeddings import Embeddings -from langchain_community.document_loaders.base import BaseLoader -from langchain_text_splitters import TextSplitter - -from ..config import get_folder_root, get_env -from ..utils import raise_not_install, hash_text, clean_filename -from ..project import is_project_existing, BaseProject -from ..writing import MarkdownLoader -from ..io import TextBlock - -from .qa_excel import QAExcelsLoader - -import os -import re -import sys -import pickle -# import subprocess - -def collect_docs(docs: List[str]) -> str: - """ - 如果 Document 中包含的 metadata['answer'] 属性就优先采纳。 - """ - return "\n-----------------------------------\n".join([ - d.page_content + "\n" + d.metadata['answer'] if 'answer' in d.metadata else d.page_content - for d in docs - ]) - -def get_file_extension(filename: str) -> str: - """Get File Extension""" - return filename.split(".")[-1].lower() - -class FileLoadFactory: - @staticmethod - def get_loader(filename): - ext = get_file_extension(filename) - if ext == "md": - return MarkdownLoader(filename) - elif ext == "xlsx": - return QAExcelsLoader(filename) - elif ext == "pdf": - try: - import pypdf - return PyPDFLoader(filename) - except BaseException as e: - raise_not_install('pypdf') - elif ext == "docx": - try: - import docx2txt - return Docx2txtLoader(filename) - except BaseException as e: - raise_not_install('docx2txt') - elif ext == "txt": - return TextLoader(filename, autodetect_encoding=True) - else: - info = f"WARNING: Loaded File extension {ext} not supported now." - print(get_warn_color() + info + "\033[0m") - - return None - -class LocalFilesLoader(BaseLoader): - """ - 从本地文件中检索知识文档,支持docx、pdf、txt、md、xlsx等文档。 - - 文档位置: - - 加载文档的位置由 {docs_folders} 指定,允许用列表指定多个(没有指定就选用 {ILLUFLY_DOCS} 环境变量) - - {docs_folders} 应当描述为相对于 {base_folder} 的相对位置 - - 文本嵌入的缓存 {cache_folder} 默认是 {base_folder},也可以专门指定 - - 过滤规则包含: - - 按目录开头过滤:由 included_prefixes 指定,以列表中的字符串开头就保留 - - 按目录开头排除:由 excluded_prefixes 指定,以列表中的字符串开头就排除 - - 按路径正则匹配:由 path_regex 指定,应当是正则表达式,通常作为文件的过滤规则使用 - - 按扩展名过滤文件:由 extensions 指定,默认为 ["docx", "pdf", "md", "txt", "xlsx"] - """ - - def __init__( - self, - docs_folders: Union[str, List[str]]=None, - cache_folder: str=None, - path_regex: str=None, - included_prefixes: List[str] = [], - excluded_prefixes: List[str] = [], - extensions: List[str] = [], - base_folder: str=None, - text_spliter: TextSplitter=None, - *args, **kwargs - ): - if isinstance(docs_folders, str): - self.docs_folders = [docs_folders] - elif isinstance(docs_folders, list): - self.docs_folders = docs_folders - elif docs_folders == None: - self.docs_folders = [get_env("ILLUFLY_DOCS")] - else: - raise(ValueError("base_folder: MUST be str or list[str]: ", base_folder)) - - self.base_folder = base_folder or get_folder_root() - self.cache_folder = cache_folder or self.base_folder - - self.path_regex = path_regex or ".*" - self.included_prefixes = included_prefixes - self.excluded_prefixes = excluded_prefixes - self.extensions = extensions or ["docx", "pdf", "md", "txt", "xlsx"] - - self.text_spliter = text_spliter - - def get_files(self) -> list[str]: - """ - 按照规则设定过滤本地资料文件。 - """ - files = [] - - documents_folders = [os.path.join(self.base_folder, folder) for folder in self.docs_folders] - for folder in documents_folders: - if is_project_existing(folder): - project = BaseProject(folder, self.base_folder) - files.extend(list(project.embedding_files)) - else: - for dirpath, dirnames, filenames in os.walk(folder): - for filename in filenames: - relpath = os.path.relpath(os.path.join(dirpath, filename), folder) - if relpath.startswith(".") or re.search('/.', relpath): - # 确保不包含以.开头的文件夹或文件 - continue - if self.included_prefixes and not any(relpath.startswith(include) for include in self.included_prefixes): - continue - if self.excluded_prefixes and any(relpath.startswith(exclude) for exclude in self.excluded_prefixes): - continue - if self.path_regex and not re.search(self.path_regex, relpath): - continue - if self.extensions and get_file_extension(filename) not in self.extensions: - continue - files.append(os.path.join(dirpath, filename)) - - return files - - def load_docs(self, filename: str) -> List[Document]: - """ - 按照文档类型加载文档,并直输出循环拆分后的文档块。 - """ - file_loader = FileLoadFactory.get_loader(filename) - if file_loader: - return file_loader.load_and_split(self.text_spliter) - else: - return [] - - def lazy_load(self) -> Iterator[Document]: - """ - 为每个文件重新分配块结构。 - """ - for filename in self.get_files(): - file_docs = self.load_docs(filename) - for doc in file_docs: - yield doc - - def load(self) -> List[Document]: - """ - 如果直接使用这个方法,将会直接调用load_docs方法。 - - 默认的load_docs方法会将文档做整体切分,然后直接输出。 - 这不是最优的RAG处理方式,但足够简单。 - """ - return list(self.lazy_load()) - - def cache_embeddings(self, model: Embeddings, tag_name: str=None): - """ - 缓存文本嵌入。 - - tag_name 支持按不同模型厂商或模型名称缓存到子目录。 - """ - tag_name = tag_name or '' - cache_folder = get_env("ILLUFLY_CACHE_EMBEDDINGS") - vector_folder = os.path.join(self.cache_folder, cache_folder, tag_name) - - to_embedding_texts = [] - to_embedding_paths = [] - - docs = self.load() - all_docs = [ - ( - d.page_content, - (clean_filename(d.metadata['source']) if 'source' in d.metadata else '') - ) - for d - in docs - ] - - for text, source in all_docs: - vector_path = hash_text(text) + ".emb" - cache_path = os.path.join(vector_folder, source, vector_path) - if not os.path.exists(cache_path): - to_embedding_texts.append(text) - to_embedding_paths.append(cache_path) - - if to_embedding_texts and len(to_embedding_texts) == len(to_embedding_paths): - vectors = model.embed_documents(to_embedding_texts) - for cache_path, text, data in list(zip(to_embedding_paths, to_embedding_texts, vectors)): - os.makedirs(os.path.dirname(cache_path), exist_ok=True) - with open(cache_path, 'wb') as f: - pickle.dump(data, f) - chunk = TextChunk('info', f'<{source}> {text[0:50]}{"..." if len(text) > 50 else ""}') - print(chunk.text_with_print_color) - - chunk = TextChunk('info', f'Cached {len(to_embedding_paths)} embeddings to {vector_folder} !') - print(chunk.text_with_print_color) - return True - - return False - - def load_embeddings(self, model: Embeddings=None, tag_name: str=None): - """ - 缓存文本嵌入。 - """ - tag_name = tag_name or '' - cache_folder = get_env("ILLUFLY_CACHE_EMBEDDINGS") - vector_folder = os.path.join(self.cache_folder, cache_folder, tag_name) - - texts = [] - vectors = [] - metadata_list = [] - to_embedding_paths = [] - - docs = self.load() - all_docs = [ - ( - d.page_content, - (clean_filename(d.metadata['source']) if 'source' in d.metadata else ''), - d.metadata - ) - for d - in docs - ] - - for text, source, metadata in all_docs: - vector_path = hash_text(text) + ".emb" - cache_path = os.path.join(vector_folder, source, vector_path) - if os.path.exists(cache_path): - with open(cache_path, 'rb') as f: - texts.append(text) - vectors.append(pickle.load(f)) - metadata_list.append(metadata) - else: - chunk = TextChunk('warn', f'No embeddings cache found for: <{source}> {text[0:50]}{"..." if len(text) > 50 else ""}') - print(chunk.text_with_print_color) - - return list(zip(texts, vectors)), model, metadata_list diff --git a/illufly/knowledge/qa_excel.py b/illufly/knowledge/qa_excel.py deleted file mode 100644 index d2d879b9..00000000 --- a/illufly/knowledge/qa_excel.py +++ /dev/null @@ -1,89 +0,0 @@ -from typing import Iterator, List, Union, Optional -from langchain_core.documents import Document -from langchain_community.document_loaders.base import BaseLoader -from langchain_text_splitters import TextSplitter - -import pandas as pd - -class QAExcelsLoader(BaseLoader): - """ - 从本地文件中检索Excel文件,并以QA结构返回 Document 对象。 - """ - - def __init__(self, filename: str=None): - self.filename = filename - - def lazy_load(self) -> Iterator[Document]: - for doc in self.load_docs(): - yield doc - - def load(self) -> List[Document]: - return list(self.lazy_load()) - - def load_and_split( - self, text_splitter: Optional[TextSplitter] = None - ) -> List[Document]: - return self.load() - - def detect_df(self, filename: str) -> tuple: - """ - 检测包含QA标记的数据框 - - 规则: - - 文件名称中包含QA的sheet - - 以数据框的形式保存整个QA文本 - - 表头应当包含QA列,即至少一列以Q开头,至少一列以A开头,例如: Q-问题 | A-回答 - - 允许表头的上方有其他行,用于说明、总结或作为空行 - """ - result = [] - - with pd.ExcelFile(filename) as xls: - sheet_names = xls.sheet_names - - target_sheets = [name for name in sheet_names if "qa" in name.lower()] - - for sheet_name in target_sheets: - df = pd.read_excel(filename, sheet_name=sheet_name, header=None, nrows=10) - - for i in range(10): - # 找到列名称以"q"或"Q"开头且包含"A"或"a"的列 - q_columns = [col for col in df.iloc[i] if str(col)[0].lower() == "q"] - a_columns = [col for col in df.iloc[i] if str(col)[0].lower() == "a"] - - if q_columns and a_columns: - result.append((filename, sheet_name, i, q_columns, a_columns)) - break - - return result - - def load_docs(self) -> List[Document]: - """ - Load documents from the specified Excel file. - """ - dfs = self.detect_df(self.filename) - documents = [] - - for file_name, sheet_name, start_row, q_columns, a_columns in dfs: - - df = pd.read_excel(file_name, sheet_name=sheet_name, header=start_row) - - df_q = df[q_columns].copy() - df_a = df[a_columns].copy() - - df_q.loc[:, 'Q'] = df_q.apply(lambda row: '\n'.join(row.values.astype(str)), axis=1) - df_a.loc[:, 'A'] = df_a.apply(lambda row: '\n'.join(row.values.astype(str)), axis=1) - - df_final = pd.concat([df_q['Q'], df_a['A']], axis=1) - - for _, row in df_final.iterrows(): - doc = Document( - page_content=row["Q"], - metadata={ - "answer": row["A"], - "source": file_name, - "sheet": sheet_name, - } - ) - documents.append(doc) - - return documents diff --git a/notes/community/chat.ipynb b/notes/community/chat.ipynb index 2ae93178..46cb8994 100644 --- a/notes/community/chat.ipynb +++ b/notes/community/chat.ipynb @@ -34,17 +34,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[32m小\u001b[0m\u001b[32m兔\u001b[0m\u001b[32m子\u001b[0m\u001b[32m做\u001b[0m\u001b[32m梦\u001b[0m\u001b[32m\n", - "\u001b[0m\u001b[32m梦\u001b[0m\u001b[32m到\u001b[0m\u001b[32m吃\u001b[0m\u001b[32m胡\u001b[0m\u001b[32m萝\u001b[0m\u001b[32m卜\u001b[0m\u001b[32m\n", - "\u001b[0m\u001b[32m睡\u001b[0m\u001b[32m梦\u001b[0m\u001b[32m中\u001b[0m\u001b[32m咯\u001b[0m\u001b[32m咯\u001b[0m\u001b[32m笑\u001b[0m\u001b[32m\n", - "\u001b[0m\u001b[32m快\u001b[0m\u001b[32m乐\u001b[0m\u001b[32m又\u001b[0m\u001b[32m开\u001b[0m\u001b[32m心\u001b[0m\n", + "\u001b[32m兔\u001b[0m\u001b[32m子\u001b[0m\u001b[32m小\u001b[0m\u001b[32m兔\u001b[0m\u001b[32m兔\u001b[0m\u001b[32m,\u001b[0m\u001b[32m快\u001b[0m\u001b[32m去\u001b[0m\u001b[32m做\u001b[0m\u001b[32m梦\u001b[0m\u001b[32m乡\u001b[0m\u001b[32m。\n", + "\u001b[0m\u001b[32m梦\u001b[0m\u001b[32m里\u001b[0m\u001b[32m有\u001b[0m\u001b[32m田\u001b[0m\u001b[32m野\u001b[0m\u001b[32m绿\u001b[0m\u001b[32m,\u001b[0m\u001b[32m和\u001b[0m\u001b[32m翩\u001b[0m\u001b[32m跹\u001b[0m\u001b[32m舞\u001b[0m\u001b[32m动\u001b[0m\u001b[32m身\u001b[0m\u001b[32m。\n", + "\u001b[0m\u001b[32m追\u001b[0m\u001b[32m逐\u001b[0m\u001b[32m彩\u001b[0m\u001b[32m虹\u001b[0m\u001b[32m云\u001b[0m\u001b[32m,\u001b[0m\u001b[32m甜\u001b[0m\u001b[32m梦\u001b[0m\u001b[32m会\u001b[0m\u001b[32m永\u001b[0m\u001b[32m恒\u001b[0m\u001b[32m。\n", + "\u001b[0m\u001b[32m兔\u001b[0m\u001b[32m子\u001b[0m\u001b[32m小\u001b[0m\u001b[32m兔\u001b[0m\u001b[32m兔\u001b[0m\u001b[32m,\u001b[0m\u001b[32m愿\u001b[0m\u001b[32m你\u001b[0m\u001b[32m梦\u001b[0m\u001b[32m境\u001b[0m\u001b[32m美\u001b[0m\u001b[32m无\u001b[0m\u001b[32m边\u001b[0m\u001b[32m。\u001b[0m\n", "\n" ] }, { "data": { "text/plain": [ - "'小兔子做梦\\n梦到吃胡萝卜\\n睡梦中咯咯笑\\n快乐又开心'" + "'兔子小兔兔,快去做梦乡。\\n梦里有田野绿,和翩跹舞动身。\\n追逐彩虹云,甜梦会永恒。\\n兔子小兔兔,愿你梦境美无边。'" ] }, "execution_count": 1, @@ -196,40 +196,28 @@ "name": "stdout", "output_type": "stream", "text": [ - " 0s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"call_vDgXP88Hd6OZBGlWO5sUiFkz\", \"type\": \"function\", \"function\": {\"name\": \"get_current_weather\", \"arguments\": \"\"}}\u001b[0m\n", - " 0s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"{\\n\"}}\u001b[0m\n", - " 0s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \" \"}}\u001b[0m\n", - " 0s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \" \\\"\"}}\u001b[0m\n", + " 0s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"call_2ihvgzROsqaQxhKYYUdF9i91\", \"type\": \"function\", \"function\": {\"name\": \"get_current_weather\", \"arguments\": \"\"}}\u001b[0m\n", + " 0s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"{\\\"\"}}\u001b[0m\n", " 0s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"location\"}}\u001b[0m\n", - " 0s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"\\\":\"}}\u001b[0m\n", - " 0s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \" \\\"\"}}\u001b[0m\n", - " 0s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"广\"}}\u001b[0m\n", - " 1s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"州\"}}\u001b[0m\n", - " 1s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"\\\"\\n\"}}\u001b[0m\n", - " 1s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"}\"}}\u001b[0m\n", - " 1s [TOOLS_CALL_FINAL] \u001b[36m{\"0\": {\"index\": 0, \"id\": \"call_vDgXP88Hd6OZBGlWO5sUiFkz\", \"type\": \"function\", \"function\": {\"name\": \"get_current_weather\", \"arguments\": \"{\\n \\\"location\\\": \\\"广州\\\"\\n}\"}}}\u001b[0m\n", - "\u001b[32m广州今天是晴天。 \u001b[0m\n", - "\n", - "\u001b[32m今天\u001b[0m\u001b[32m广州\u001b[0m\u001b[32m是\u001b[0m\u001b[32m晴\u001b[0m\u001b[32m天\u001b[0m\u001b[32m,\u001b[0m\u001b[32m出\u001b[0m\u001b[32m门\u001b[0m\u001b[32m时\u001b[0m\u001b[32m建议\u001b[0m\u001b[32m准备\u001b[0m\u001b[32m:\n", - "\n", - "\u001b[0m\u001b[32m1\u001b[0m\u001b[32m.\u001b[0m\u001b[32m **\u001b[0m\u001b[32m防\u001b[0m\u001b[32m晒\u001b[0m\u001b[32m用品\u001b[0m\u001b[32m**\u001b[0m\u001b[32m:\u001b[0m\u001b[32m如\u001b[0m\u001b[32m防\u001b[0m\u001b[32m晒\u001b[0m\u001b[32m霜\u001b[0m\u001b[32m、\u001b[0m\u001b[32m太阳\u001b[0m\u001b[32m镜\u001b[0m\u001b[32m、\u001b[0m\u001b[32m帽\u001b[0m\u001b[32m子\u001b[0m\u001b[32m等\u001b[0m\u001b[32m。\n", - "\u001b[0m\u001b[32m2\u001b[0m\u001b[32m.\u001b[0m\u001b[32m **\u001b[0m\u001b[32m轻\u001b[0m\u001b[32m便\u001b[0m\u001b[32m的\u001b[0m\u001b[32m衣\u001b[0m\u001b[32m物\u001b[0m\u001b[32m**\u001b[0m\u001b[32m:\u001b[0m\u001b[32m建议\u001b[0m\u001b[32m穿\u001b[0m\u001b[32m着\u001b[0m\u001b[32m舒\u001b[0m\u001b[32m适\u001b[0m\u001b[32m、\u001b[0m\u001b[32m透\u001b[0m\u001b[32m气\u001b[0m\u001b[32m的\u001b[0m\u001b[32m衣\u001b[0m\u001b[32m服\u001b[0m\u001b[32m。\n", - "\u001b[0m\u001b[32m3\u001b[0m\u001b[32m.\u001b[0m\u001b[32m **\u001b[0m\u001b[32m水\u001b[0m\u001b[32m瓶\u001b[0m\u001b[32m**\u001b[0m\u001b[32m:\u001b[0m\u001b[32m多\u001b[0m\u001b[32m喝\u001b[0m\u001b[32m水\u001b[0m\u001b[32m,\u001b[0m\u001b[32m保持\u001b[0m\u001b[32m水\u001b[0m\u001b[32m分\u001b[0m\u001b[32m。\n", - "\u001b[0m\u001b[32m4\u001b[0m\u001b[32m.\u001b[0m\u001b[32m **\u001b[0m\u001b[32m轻\u001b[0m\u001b[32m便\u001b[0m\u001b[32m的\u001b[0m\u001b[32m鞋\u001b[0m\u001b[32m子\u001b[0m\u001b[32m**\u001b[0m\u001b[32m:\u001b[0m\u001b[32m适\u001b[0m\u001b[32m合\u001b[0m\u001b[32m行\u001b[0m\u001b[32m走\u001b[0m\u001b[32m的\u001b[0m\u001b[32m鞋\u001b[0m\u001b[32m子\u001b[0m\u001b[32m。\n", - "\u001b[0m\u001b[32m5\u001b[0m\u001b[32m.\u001b[0m\u001b[32m **\u001b[0m\u001b[32m随\u001b[0m\u001b[32m身\u001b[0m\u001b[32m小\u001b[0m\u001b[32m物\u001b[0m\u001b[32m**\u001b[0m\u001b[32m:\u001b[0m\u001b[32m如\u001b[0m\u001b[32m手机\u001b[0m\u001b[32m、\u001b[0m\u001b[32m钱包\u001b[0m\u001b[32m等\u001b[0m\u001b[32m日\u001b[0m\u001b[32m常\u001b[0m\u001b[32m必\u001b[0m\u001b[32m需\u001b[0m\u001b[32m品\u001b[0m\u001b[32m。\n", - "\n", - "\u001b[0m\u001b[32m祝\u001b[0m\u001b[32m你\u001b[0m\u001b[32m出\u001b[0m\u001b[32m行\u001b[0m\u001b[32m愉\u001b[0m\u001b[32m快\u001b[0m\u001b[32m!\u001b[0m\n", - "\n" + " 0s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"\\\":\\\"\"}}\u001b[0m\n", + " 0s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"Gu\"}}\u001b[0m\n", + " 0s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"ang\"}}\u001b[0m\n", + " 0s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"zhou\"}}\u001b[0m\n", + " 0s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"\\\"}\"}}\u001b[0m\n", + " 1s [TOOLS_CALL_FINAL] \u001b[36m{\"0\": {\"index\": 0, \"id\": \"call_2ihvgzROsqaQxhKYYUdF9i91\", \"type\": \"function\", \"function\": {\"name\": \"get_current_weather\", \"arguments\": \"{\\\"location\\\":\\\"Guangzhou\\\"}\"}}}\u001b[0m\n" ] }, { "data": { "text/plain": [ "[{'role': 'user', 'content': '今天广州出门要准备什么'},\n", - " {'role': 'assistant', 'content': ''},\n", - " {'role': 'tool', 'content': '广州今天是晴天。 '},\n", " {'role': 'assistant',\n", - " 'content': '今天广州是晴天,出门时建议准备:\\n\\n1. **防晒用品**:如防晒霜、太阳镜、帽子等。\\n2. **轻便的衣物**:建议穿着舒适、透气的衣服。\\n3. **水瓶**:多喝水,保持水分。\\n4. **轻便的鞋子**:适合行走的鞋子。\\n5. **随身小物**:如手机、钱包等日常必需品。\\n\\n祝你出行愉快!'}]" + " 'content': '',\n", + " 'tool_calls': [{'index': 0,\n", + " 'id': 'call_2ihvgzROsqaQxhKYYUdF9i91',\n", + " 'type': 'function',\n", + " 'function': {'name': 'get_current_weather',\n", + " 'arguments': '{\"location\":\"Guangzhou\"}'}}]}]" ] }, "execution_count": 1, @@ -245,14 +233,14 @@ " \"\"\"获取城市的天气情况\"\"\"\n", " yield f\"{location}今天是晴天。 \"\n", " \n", - "a = ChatOpenAI(tools=[ToolAgent(get_current_weather)])\n", + "a = ChatOpenAI(tools=[ToolAgent(get_current_weather)], exec_tool=False)\n", "a(\"今天广州出门要准备什么\", new_chat=True, verbose=True)\n", "a.memory" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "id": "da1c3d70-917d-41aa-8490-0237f55c57ca", "metadata": {}, "outputs": [ @@ -260,16 +248,16 @@ "name": "stdout", "output_type": "stream", "text": [ - " 1s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"call_9SJJNlXFZV5GQne0LkVevFtC\", \"type\": \"function\", \"function\": {\"name\": \"get_current_weather\", \"arguments\": \"\"}}\u001b[0m\n", + " 1s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"call_QEhleHKNZDbiAArF6MEmPazF\", \"type\": \"function\", \"function\": {\"name\": \"get_current_weather\", \"arguments\": \"\"}}\u001b[0m\n", " 1s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"{\\\"\"}}\u001b[0m\n", " 1s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"location\"}}\u001b[0m\n", " 1s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"\\\":\\\"\"}}\u001b[0m\n", " 1s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"广州\"}}\u001b[0m\n", " 1s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"name\": \"\", \"arguments\": \"\\\"}\"}}\u001b[0m\n", - " 1s [TOOLS_CALL_FINAL] \u001b[36m{\"0\": {\"index\": 0, \"id\": \"call_9SJJNlXFZV5GQne0LkVevFtC\", \"type\": \"function\", \"function\": {\"name\": \"get_current_weather\", \"arguments\": \"{\\\"location\\\":\\\"广州\\\"}\"}}}\u001b[0m\n", + " 1s [TOOLS_CALL_FINAL] \u001b[36m{\"0\": {\"index\": 0, \"id\": \"call_QEhleHKNZDbiAArF6MEmPazF\", \"type\": \"function\", \"function\": {\"name\": \"get_current_weather\", \"arguments\": \"{\\\"location\\\":\\\"广州\\\"}\"}}}\u001b[0m\n", "\u001b[32m广州今天是晴天, 25摄氏度。 \u001b[0m\n", "\n", - "\u001b[32m根\u001b[0m\u001b[32m据\u001b[0m\u001b[32m广\u001b[0m\u001b[32m州\u001b[0m\u001b[32m今\u001b[0m\u001b[32m天\u001b[0m\u001b[32m的\u001b[0m\u001b[32m天\u001b[0m\u001b[32m气\u001b[0m\u001b[32m情\u001b[0m\u001b[32m况\u001b[0m\u001b[32m,\u001b[0m\u001b[32m不\u001b[0m\u001b[32m需要\u001b[0m\u001b[32m穿\u001b[0m\u001b[32m羽\u001b[0m\u001b[32m绒\u001b[0m\u001b[32m服\u001b[0m\u001b[32m。\u001b[0m\u001b[32m今\u001b[0m\u001b[32m天\u001b[0m\u001b[32m是\u001b[0m\u001b[32m晴\u001b[0m\u001b[32m天\u001b[0m\u001b[32m,\u001b[0m\u001b[32m温\u001b[0m\u001b[32m度\u001b[0m\u001b[32m为\u001b[0m\u001b[32m25\u001b[0m\u001b[32m摄\u001b[0m\u001b[32m氏\u001b[0m\u001b[32m度\u001b[0m\u001b[32m,\u001b[0m\u001b[32m比\u001b[0m\u001b[32m较\u001b[0m\u001b[32m温\u001b[0m\u001b[32m暖\u001b[0m\u001b[32m。\u001b[0m\u001b[32m你\u001b[0m\u001b[32m可以\u001b[0m\u001b[32m选择\u001b[0m\u001b[32m适\u001b[0m\u001b[32m合\u001b[0m\u001b[32m室\u001b[0m\u001b[32m外\u001b[0m\u001b[32m活\u001b[0m\u001b[32m动\u001b[0m\u001b[32m的\u001b[0m\u001b[32m轻\u001b[0m\u001b[32m便\u001b[0m\u001b[32m衣\u001b[0m\u001b[32m物\u001b[0m\u001b[32m。\u001b[0m\n", + "\u001b[32m今\u001b[0m\u001b[32m天\u001b[0m\u001b[32m广\u001b[0m\u001b[32m州\u001b[0m\u001b[32m是\u001b[0m\u001b[32m晴\u001b[0m\u001b[32m天\u001b[0m\u001b[32m,\u001b[0m\u001b[32m气\u001b[0m\u001b[32m温\u001b[0m\u001b[32m为\u001b[0m\u001b[32m25\u001b[0m\u001b[32m摄\u001b[0m\u001b[32m氏\u001b[0m\u001b[32m度\u001b[0m\u001b[32m,\u001b[0m\u001b[32m不\u001b[0m\u001b[32m需要\u001b[0m\u001b[32m穿\u001b[0m\u001b[32m羽\u001b[0m\u001b[32m绒\u001b[0m\u001b[32m服\u001b[0m\u001b[32m哦\u001b[0m\u001b[32m。\u001b[0m\n", "\n" ] }, @@ -277,13 +265,21 @@ "data": { "text/plain": [ "[{'role': 'user', 'content': '今天广州出门要穿羽绒服吗?'},\n", - " {'role': 'assistant', 'content': ''},\n", - " {'role': 'tool', 'content': '广州今天是晴天, 25摄氏度。 '},\n", " {'role': 'assistant',\n", - " 'content': '根据广州今天的天气情况,不需要穿羽绒服。今天是晴天,温度为25摄氏度,比较温暖。你可以选择适合室外活动的轻便衣物。'}]" + " 'content': '',\n", + " 'tool_calls': [{'index': 0,\n", + " 'id': 'call_QEhleHKNZDbiAArF6MEmPazF',\n", + " 'type': 'function',\n", + " 'function': {'name': 'get_current_weather',\n", + " 'arguments': '{\"location\":\"广州\"}'}}]},\n", + " {'tool_call_id': 'call_QEhleHKNZDbiAArF6MEmPazF',\n", + " 'role': 'tool',\n", + " 'name': 'get_current_weather',\n", + " 'content': '广州今天是晴天, 25摄氏度。 '},\n", + " {'role': 'assistant', 'content': '今天广州是晴天,气温为25摄氏度,不需要穿羽绒服哦。'}]" ] }, - "execution_count": 3, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } diff --git a/notes/core/agent.ipynb b/notes/core/agent.ipynb index d9113b28..8aca0dfc 100644 --- a/notes/core/agent.ipynb +++ b/notes/core/agent.ipynb @@ -208,7 +208,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[32m当然\u001b[0m\u001b[32m,\u001b[0m\u001b[32m我已经\u001b[0m\u001b[32m准备好了。请告诉我\u001b[0m\u001b[32m您需要的帮助,比如创作故事、\u001b[0m\u001b[32m撰写文章、构思大纲、润色\u001b[0m\u001b[32m文字等具体任务,我将直接\u001b[0m\u001b[32m提供相应的markdown格式内容。\u001b[0m\u001b[32m\u001b[0m\n", + "new_chat\n", + "self.task 给我讲个笑话\n", + "\u001b[32m当然\u001b[0m\u001b[32m,\u001b[0m\u001b[32m这里\u001b[0m\u001b[32m有一个笑话供您欣赏\u001b[0m\u001b[32m:\n", + "\n", + "为什么袜子总是只丢一只\u001b[0m\u001b[32m?因为丢两只根本就不会发现。\u001b[0m\u001b[32m\u001b[0m\n", "\n" ] }, @@ -216,10 +220,10 @@ "data": { "text/plain": [ "[{'role': 'system',\n", - " 'content': '你是强大的写作助手。\\n\\n你必须遵循以下约束来完成任务:\\n1. 直接输出你的结果,不要评论,不要啰嗦\\n2. 使用markdown格式输出\\n\\n**你的任务是:**\\n\\n'},\n", + " 'content': '你是强大的写作助手。\\n\\n你必须遵循以下约束来完成任务:\\n1. 直接输出你的结果,不要评论,不要啰嗦\\n2. 使用markdown格式输出\\n\\n**你的任务是:**\\n给我讲个笑话\\n'},\n", " {'role': 'user', 'content': '请你开始'},\n", " {'role': 'assistant',\n", - " 'content': '当然,我已经准备好了。请告诉我您需要的帮助,比如创作故事、撰写文章、构思大纲、润色文字等具体任务,我将直接提供相应的markdown格式内容。'}]" + " 'content': '当然,这里有一个笑话供您欣赏:\\n\\n为什么袜子总是只丢一只?因为丢两只根本就不会发现。'}]" ] }, "execution_count": 2, @@ -233,6 +237,50 @@ "a.memory" ] }, + { + "cell_type": "code", + "execution_count": 3, + "id": "fdd6c8fd-09b3-4627-8f9d-a3d7d4cf6e38", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'last_input': None,\n", + " 'last_output': '当然,这里有一个笑话供您欣赏:\\n\\n为什么袜子总是只丢一只?因为丢两只根本就不会发现。',\n", + " 'task': '给我讲个笑话'}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a.exported_vars" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5ee3de44-fd1a-4ef3-b3c0-50cf1ae97e5d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'task'}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a.bound_vars" + ] + }, { "cell_type": "code", "execution_count": 3, @@ -342,7 +390,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 1, "id": "cd7e0d2d-fb9d-4992-9045-3dc602fd346e", "metadata": {}, "outputs": [ @@ -350,8 +398,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[32m小\u001b[0m\u001b[32m兔\u001b[0m\u001b[32m乖乖\u001b[0m\u001b[32m梦中跳,月亮\u001b[0m\u001b[32m船上摇啊摇。\n", - "梦见萝卜比\u001b[0m\u001b[32m树高,醒来咯咯笑。\u001b[0m\u001b[32m\u001b[0m\n", + "\u001b[32m小\u001b[0m\u001b[32m兔子\u001b[0m\u001b[32m,\u001b[0m\u001b[32m梦中跳, \n", + "\u001b[0m\u001b[32m胡萝卜,满天空飘。 \n", + "月亮\u001b[0m\u001b[32m船,载它游, \n", + "梦\u001b[0m\u001b[32m里笑,乐悠悠。\u001b[0m\u001b[32m\u001b[0m\n", "\n" ] }, @@ -359,10 +409,11 @@ "data": { "text/plain": [ "[{'role': 'user', 'content': '你能帮我写一首关于兔子做梦的四句儿歌?'},\n", - " {'role': 'assistant', 'content': '小兔乖乖梦中跳,月亮船上摇啊摇。\\n梦见萝卜比树高,醒来咯咯笑。'}]" + " {'role': 'assistant',\n", + " 'content': '小兔子,梦中跳, \\n胡萝卜,满天空飘。 \\n月亮船,载它游, \\n梦里笑,乐悠悠。'}]" ] }, - "execution_count": 8, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -385,7 +436,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "8c70c16a-77a4-44f5-865a-b4dea7858496", "metadata": {}, "outputs": [ @@ -409,7 +460,7 @@ " 'content': '小白兔,蹦蹦跳, \\n耳朵长,尾巴小。 \\n爱吃萝卜和青菜, \\n快乐生活在林梢。'}]" ] }, - "execution_count": 1, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -456,10 +507,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[32m小白\u001b[0m\u001b[32m兔\u001b[0m\u001b[32m,\u001b[0m\u001b[32m真可爱, \n", - "耳朵\u001b[0m\u001b[32m长,跳得快。 \n", - "爱吃\u001b[0m\u001b[32m萝卜和青菜, \n", - "快乐生活在\u001b[0m\u001b[32m林间。\u001b[0m\u001b[32m\u001b[0m\n", + "\u001b[32m小\u001b[0m\u001b[32m兔子\u001b[0m\u001b[32m白\u001b[0m\u001b[32m又白,耳朵长长\u001b[0m\u001b[32m蹦蹦跳。\n", + "爱吃萝卜和青\u001b[0m\u001b[32m菜,月光下蹦跶真\u001b[0m\u001b[32m快乐。\u001b[0m\u001b[32m\u001b[0m\n", "\n" ] }, @@ -468,8 +517,7 @@ "text/plain": [ "[{'role': 'system', 'content': '你是一个专门写儿歌的作家,请根据我的提示写作。'},\n", " {'role': 'user', 'content': '来一首关于兔子的,四句'},\n", - " {'role': 'assistant',\n", - " 'content': '小白兔,真可爱, \\n耳朵长,跳得快。 \\n爱吃萝卜和青菜, \\n快乐生活在林间。'}]" + " {'role': 'assistant', 'content': '小兔子白又白,耳朵长长蹦蹦跳。\\n爱吃萝卜和青菜,月光下蹦跶真快乐。'}]" ] }, "execution_count": 4, @@ -488,7 +536,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "5c17899e-88d4-4d34-a3fd-8ab54a6a1b27", "metadata": {}, "outputs": [ @@ -496,28 +544,23 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[32m当然\u001b[0m\u001b[32m,\u001b[0m\u001b[32m简\u001b[0m\u001b[32m练的儿歌既能\u001b[0m\u001b[32m吸引小朋友的注意力,又便于他们\u001b[0m\u001b[32m记忆和传唱。请告诉我想要\u001b[0m\u001b[32m体现的主题或者特定的内容提示,我\u001b[0m\u001b[32m将据此创作。例如,如果我们以\u001b[0m\u001b[32m“小星星”为主题,一首简\u001b[0m\u001b[32m练的儿歌可以是:\n", - "\n", - "``\u001b[0m\u001b[32m`\n", - "小星星,亮晶晶,\n", - "\u001b[0m\u001b[32m挂在天空眨眼睛。\n", - "夜幕降临\u001b[0m\u001b[32m它出现,\n", - "伴着宝宝入梦\u001b[0m\u001b[32m乡。\n", - "```\n", - "\n", - "这样的儿歌简单\u001b[0m\u001b[32m易懂,节奏明快,非常适合\u001b[0m\u001b[32m儿童。请提供您的想法或主题\u001b[0m\u001b[32m,我将继续为您定制。\u001b[0m\u001b[32m\u001b[0m\n", + "\u001b[32m小白\u001b[0m\u001b[32m兔\u001b[0m\u001b[32m,\u001b[0m\u001b[32m蹦又跳, \n", + "\u001b[0m\u001b[32m长耳朵,摇啊摇。 \n", + "\u001b[0m\u001b[32m爱吃萝卜和青菜, \n", + "快乐\u001b[0m\u001b[32m生活在林梢。\u001b[0m\u001b[32m\u001b[0m\n", "\n" ] }, { "data": { "text/plain": [ - "[{'role': 'system', 'content': '你是一个专门写儿歌的作家,请根据我的提示写作。我的要求是:'},\n", + "[{'role': 'system', 'content': '你是一个专门写儿歌的作家,请根据我的提示写作。我的要求是:来一首关于兔子的,四句'},\n", " {'role': 'user', 'content': '要简练'},\n", - " {'role': 'assistant', 'content': '小星星,亮晶晶,\\n挂在天空眨眼睛。\\n夜幕降临它出现,\\n伴着宝宝入梦乡。'}]" + " {'role': 'assistant',\n", + " 'content': '小白兔,蹦又跳, \\n长耳朵,摇啊摇。 \\n爱吃萝卜和青菜, \\n快乐生活在林梢。'}]" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -528,8 +571,7 @@ "\n", "qwen = ChatQwen(memory=[\n", " Template(\n", - " template_text=\"你是一个专门写儿歌的作家,请根据我的提示写作。我的要求是:{{question}}\",\n", - " input_mapping={\"question\": \"task\"}\n", + " template_text=\"你是一个专门写儿歌的作家,请根据我的提示写作。我的要求是:{{task}}\"\n", " ),\n", " \"要简练\"\n", "])\n", @@ -539,29 +581,25 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 7, "id": "28a2fde2-a135-4c0a-9794-6d92ad239ee6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'knowledge': set(),\n", - " 'data': {},\n", - " 'task': '来一首关于兔子的,四句',\n", - " 'draft': None,\n", - " 'outline': None,\n", - " 'output': '当然可以,我会尽力创作一首既简练又富有意趣的儿歌。请告诉我想要体现的主题或者特定的元素,比如动物、颜色、数字等信息,我就能开始写作。',\n", - " 'state': {}}" + "{'last_input': None,\n", + " 'last_output': '小白兔,蹦又跳, \\n长耳朵,摇啊摇。 \\n爱吃萝卜和青菜, \\n快乐生活在林梢。',\n", + " 'task': '来一首关于兔子的,四句'}" ] }, - "execution_count": 3, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "qwen.desk" + "qwen.exported_vars" ] }, { @@ -574,10 +612,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[32m两条\u001b[0m\u001b[32m小\u001b[0m\u001b[32m鱼\u001b[0m\u001b[32m,水中游,\n", - "尾巴\u001b[0m\u001b[32m摇摇,多自由。\n", - "吐个\u001b[0m\u001b[32m泡泡,笑一笑,\n", - "海底世界,\u001b[0m\u001b[32m真奇妙。\u001b[0m\u001b[32m\u001b[0m\n", + "\u001b[32m两条\u001b[0m\u001b[32m小\u001b[0m\u001b[32m鱼\u001b[0m\u001b[32m,水中游, \n", + "\u001b[0m\u001b[32m摇摇尾巴,点点头。 \n", + "相互\u001b[0m\u001b[32m追逐多欢喜, \n", + "波光里\u001b[0m\u001b[32m笑影悠悠。\u001b[0m\u001b[32m\u001b[0m\n", "\n" ] }, @@ -586,10 +624,11 @@ "text/plain": [ "[{'role': 'system', 'content': '你是一个专门写儿歌的作家,请根据我的提示写作。我的要求是:来一首关于兔子的,四句'},\n", " {'role': 'user', 'content': '要简练'},\n", - " {'role': 'assistant', 'content': '小白兔,蹦又跳,\\n胡萝卜,吃个饱。\\n长耳朵,晃呀晃,\\n月光下,回家了。'},\n", + " {'role': 'assistant',\n", + " 'content': '小白兔,蹦又跳, \\n长耳朵,摇啊摇。 \\n爱吃萝卜和青菜, \\n快乐生活在林梢。'},\n", " {'role': 'user', 'content': '换成两条小鱼'},\n", " {'role': 'assistant',\n", - " 'content': '两条小鱼,水中游,\\n尾巴摇摇,多自由。\\n吐个泡泡,笑一笑,\\n海底世界,真奇妙。'}]" + " 'content': '两条小鱼,水中游, \\n摇摇尾巴,点点头。 \\n相互追逐多欢喜, \\n波光里笑影悠悠。'}]" ] }, "execution_count": 9, @@ -598,7 +637,7 @@ } ], "source": [ - "log(qwen, \"换成两条小鱼\")\n", + "qwen(\"换成两条小鱼\")\n", "qwen.memory" ] }, @@ -629,36 +668,28 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[32m广州天气如何啊?今天是晴天。 \u001b[0m" + "\u001b[32m广州天气如何啊?今天是晴天。 \u001b[0m\n", + "\n" ] - }, - { - "data": { - "text/plain": [ - "''" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ "# 从函数定义工具\n", - "from illufly.agent import Tool, ChatQwen\n", + "from illufly.chat import ChatQwen\n", + "from illufly.types import ToolAgent\n", "from illufly.io import TextBlock, log\n", "\n", "def get_current_weather(location: str):\n", " \"\"\"获取城市的天气情况\"\"\"\n", " yield TextBlock(\"chunk\", f\"{location}今天是晴天。 \")\n", "\n", - "t = Tool(get_current_weather)\n", + "t = ToolAgent(get_current_weather)\n", "log(t, \"广州天气如何啊?\")" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 14, "id": "4c7a9028-eee1-4c48-8d83-403970caf409", "metadata": {}, "outputs": [ @@ -668,14 +699,14 @@ "[]" ] }, - "execution_count": 4, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "q = ChatQwen(tools=[Tool(get_current_weather)])\n", - "q.toolkits" + "q = ChatQwen(tools=[ToolAgent(get_current_weather)])\n", + "q.tools" ] }, { @@ -688,7 +719,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "id": "931704ba-5ddc-4cdb-80bb-a6a7377edea0", "metadata": {}, "outputs": [ @@ -696,29 +727,33 @@ "name": "stdout", "output_type": "stream", "text": [ - "[TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"call_12a6ae8e89294c5c928352\", \"type\": \"function\", \"function\": {\"name\": \"get_current_weather\", \"arguments\": \"\"}}\u001b[0m\n", - "[TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"arguments\": \"{\\\"location\\\": \\\"\"}}\u001b[0m\n", - "[TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"arguments\": \"广州\\\"}\"}}\u001b[0m\n", - "[TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {}}\u001b[0m\n", - "[TOOLS_CALL_FINAL] \u001b[36m{\"0\": {\"index\": 0, \"id\": \"call_12a6ae8e89294c5c928352\", \"type\": \"function\", \"function\": {\"name\": \"get_current_weather\", \"arguments\": \"{\\\"location\\\": \\\"广州\\\"}\"}}}\u001b[0m\n" + " 1s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"call_8909e53a21914771b52ab5\", \"type\": \"function\", \"function\": {\"name\": \"get_current_weather\", \"arguments\": \"\"}}\u001b[0m\n", + " 1s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"arguments\": \"{\\\"location\\\": \\\"\"}}\u001b[0m\n", + " 1s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {\"arguments\": \"广州\\\"}\"}}\u001b[0m\n", + " 1s [TOOLS_CALL_CHUNK] \u001b[32m{\"index\": 0, \"id\": \"\", \"type\": \"function\", \"function\": {}}\u001b[0m\n", + " 1s [TOOLS_CALL_FINAL] \u001b[36m{\"0\": {\"index\": 0, \"id\": \"call_8909e53a21914771b52ab5\", \"type\": \"function\", \"function\": {\"name\": \"get_current_weather\", \"arguments\": \"{\\\"location\\\": \\\"广州\\\"}\"}}}\u001b[0m\n", + "\u001b[32m广州今天是晴天。 \u001b[0m\u001b[32m广\u001b[0m\u001b[32m州今天是晴天\u001b[0m\u001b[32m。不过请注意,实际天气可能会有所\u001b[0m\u001b[32m变化,外出时最好还是确认一下\u001b[0m\u001b[32m实时天气。\u001b[0m\u001b[32m\u001b[0m\n", + "\n" ] }, { "data": { "text/plain": [ "[{'role': 'user', 'content': '今天广州天气如何啊'},\n", + " {'role': 'assistant', 'content': ''},\n", + " {'role': 'tool', 'content': '广州今天是晴天。 '},\n", " {'role': 'assistant',\n", - " 'content': '{\"0\": {\"index\": 0, \"id\": \"call_12a6ae8e89294c5c928352\", \"type\": \"function\", \"function\": {\"name\": \"get_current_weather\", \"arguments\": \"{\\\\\"location\\\\\": \\\\\"广州\\\\\"}\"}}}'}]" + " 'content': '广州今天是晴天。不过请注意,实际天气可能会有所变化,外出时最好还是确认一下实时天气。'}]" ] }, - "execution_count": 5, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 在类定义中声明工具\n", - "q = ChatQwen(tools=[Tool(get_current_weather)], exec_tool=False)\n", + "q = ChatQwen(tools=[ToolAgent(get_current_weather)], exec_tool=False)\n", "log(q, \"今天广州天气如何啊\", verbose=True)\n", "q.memory" ] diff --git a/notes/core/runnable.ipynb b/notes/core/runnable.ipynb new file mode 100644 index 00000000..7199b9aa --- /dev/null +++ b/notes/core/runnable.ipynb @@ -0,0 +1,264 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a64e44ae-f59b-4535-bddd-73fb94c212e1", + "metadata": {}, + "source": [ + "# Runnable" + ] + }, + { + "cell_type": "markdown", + "id": "c7c10178-0131-4039-b13c-4ec6bcc1151b", + "metadata": {}, + "source": [ + "## 订阅发布机制\n", + "\n", + "使用 pub/sub 机制实现动运行时的变量传递。" + ] + }, + { + "cell_type": "markdown", + "id": "584d0f44-9701-414d-b79c-310c7e234af2", + "metadata": {}, + "source": [ + "### 在两个实例之间订阅" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ad577e05-1a46-4e42-9864-8e55ee39f614", + "metadata": {}, + "outputs": [], + "source": [ + "from illufly.types import Runnable\n", + "\n", + "class A(Runnable):\n", + " def call(self, prompt: str):\n", + " self._last_input = prompt\n", + " self._last_output = f'你说了 {prompt}'\n", + " print(prompt)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "039e85a6-939e-49bb-91f2-0f5a01ca0795", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "hi\n" + ] + }, + { + "data": { + "text/plain": [ + "{'last_input': 'hi', 'last_output': '你说了 hi'}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = A()\n", + "a.call(\"hi\")\n", + "a.exported_vars" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "32827100-b218-42c2-a9c7-c3154089797d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'last_input': 'hi', 'last_output': '你说了 hi'}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b = A()\n", + "b.bind_runnables(a)\n", + "b.imported_vars" + ] + }, + { + "cell_type": "markdown", + "id": "1197e33a-e4d1-47d1-91ce-3259532b4ca9", + "metadata": {}, + "source": [ + "## 使用映射规则" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f612236b-99cd-431f-adb2-5b9fc13eb547", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a1 exported: {'last_input': None, 'last_output': 'hi'}\n", + "a2 exported: {'last_input': None, 'last_output': '我是黑牛'}\n", + "b imported: {'last_input': None, 'a1': 'hi', 'a2': '我是黑牛'}\n" + ] + } + ], + "source": [ + "from illufly.types import Runnable\n", + "\n", + "class A(Runnable):\n", + " def call(self, prompt: str):\n", + " pass\n", + "\n", + "class B(Runnable):\n", + " def call(self):\n", + " pass\n", + "\n", + "a1 = A()\n", + "a2 = A()\n", + "b = B()\n", + "b.bind_runnables(a1, binding_map={\"a1\": \"last_output\"})\n", + "b.bind_runnables(a2, binding_map={\"a2\": \"last_output\"})\n", + "a1._last_output = \"hi\"\n", + "a2._last_output = \"我是黑牛\"\n", + "\n", + "print(\"a1 exported:\", a1.exported_vars)\n", + "print(\"a2 exported:\", a2.exported_vars)\n", + "print(\"b imported:\", b.imported_vars)" + ] + }, + { + "cell_type": "markdown", + "id": "d9788e07-5798-446d-91c5-2dfd7efcbc3c", + "metadata": {}, + "source": [ + "## 在容器实例内订阅" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ba31fbf6-a90a-48b8-b5c7-01bfa65c5eaa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "那小子在说 >> 你好\n" + ] + } + ], + "source": [ + "from illufly.types import Runnable\n", + "\n", + "# 用于被嵌入\n", + "class A(Runnable):\n", + " def call(self, prompt: str):\n", + " self._last_input = prompt\n", + " self._last_output = f'{self.imported_vars[\"last_input\"]} {prompt}'\n", + "\n", + "class B(Runnable):\n", + " def __init__(self, obj: Runnable):\n", + " super().__init__()\n", + " obj.bind_runnables(self)\n", + " self.obj = obj\n", + "\n", + " def call(self, instruction):\n", + " self._last_input = instruction\n", + " self.obj.call(\"你好\")\n", + " print(self.obj.last_output)\n", + "a = A()\n", + "b = B(a)\n", + "b.call(\"那小子在说 >> \")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "0ef442ff-62b6-439e-b8c7-2c8b4e453333", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'last_input': '那小子在说 >> ', 'last_output': None}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b.exported_vars" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "9fcd9e1b-cd69-4b69-bcad-f4972864f94e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'last_input': '你好', 'last_output': '那小子在说 >> 你好'}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a.exported_vars" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "daed1163-47bf-4e62-8d78-35759ef61535", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "textlong-same-ipykernel", + "language": "python", + "name": "textlong-same-ipykernel" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notes/core/template.ipynb b/notes/core/template.ipynb new file mode 100644 index 00000000..ac63d26c --- /dev/null +++ b/notes/core/template.ipynb @@ -0,0 +1,299 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "472d7585-44b2-4ef0-9fa8-ed05ab445e5f", + "metadata": {}, + "source": [ + "# 提示语模板" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "9ad9f371-60e9-4b56-93b0-739eaed5592e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "