Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

can the kgextract pipeline continue from a cache/checkpoint #105

Open
pecanjk opened this issue Dec 6, 2024 · 2 comments
Open

can the kgextract pipeline continue from a cache/checkpoint #105

pecanjk opened this issue Dec 6, 2024 · 2 comments

Comments

@pecanjk
Copy link

pecanjk commented Dec 6, 2024

if error happened during the pipeline. Could it continue from a cache?

@thundax-lyp
Copy link
Contributor

这个需求有点复杂。可以参考GraphRAG,在LLM侧做缓存。如果你是使用example里的indexes.py做的。可以参考一下代码。

class CacheableOpenAIClient(OpenAIClient):
    def __call__(self, prompt: str, image_url: str = None):
        if image_url is not None:
            return super().__call__(prompt, image_url)

        cache_filename = self.get_cache_filename(prompt)
        response = None
        if os.path.exists(cache_filename):
            try:
                with open(cache_filename, 'r', encoding='utf-8') as f:
                    data: dict = json.load(f)
                response = data.get('response')
            except Exception as e:
                logger.warning(f'Failed to load cache file {cache_filename}: {e}')
                try:
                    os.unlink(cache_filename)
                except Exception as e:
                    pass

        if response:
            os.utime(cache_filename, None)
            return response

        response = super().__call__(prompt, image_url)

        if response:
            os.makedirs(os.path.dirname(cache_filename), exist_ok=True)
            try:
                with open(cache_filename, 'w', encoding='utf-8') as f:
                    f.write(json.dumps({
                        'reqeust': {
                            'model': self.model, 'prompt': prompt, 'temperature': self.temperature
                        },
                        'response': response
                    }, ensure_ascii=False, indent=4))
            except Exception as e:
                logger.warning(f'Failed to write cache file {cache_filename}: {e}')

        return response

    def get_cache_filename(self, prompt: str):
        hash_code = md5(prompt)
        return os.path.join(LLM_CACHE_DIR, self.model, hash_code[:2], f'{hash_code}.json')

    pass


kag.common.llm.client.OpenAIClient = CacheableOpenAIClient

@caszkgui
Copy link
Collaborator

caszkgui commented Dec 6, 2024

这个需求有点复杂。可以参考GraphRAG,在LLM侧做缓存。如果你是使用example里的indexes.py做的。可以参考一下代码。

class CacheableOpenAIClient(OpenAIClient):
    def __call__(self, prompt: str, image_url: str = None):
        if image_url is not None:
            return super().__call__(prompt, image_url)

        cache_filename = self.get_cache_filename(prompt)
        response = None
        if os.path.exists(cache_filename):
            try:
                with open(cache_filename, 'r', encoding='utf-8') as f:
                    data: dict = json.load(f)
                response = data.get('response')
            except Exception as e:
                logger.warning(f'Failed to load cache file {cache_filename}: {e}')
                try:
                    os.unlink(cache_filename)
                except Exception as e:
                    pass

        if response:
            os.utime(cache_filename, None)
            return response

        response = super().__call__(prompt, image_url)

        if response:
            os.makedirs(os.path.dirname(cache_filename), exist_ok=True)
            try:
                with open(cache_filename, 'w', encoding='utf-8') as f:
                    f.write(json.dumps({
                        'reqeust': {
                            'model': self.model, 'prompt': prompt, 'temperature': self.temperature
                        },
                        'response': response
                    }, ensure_ascii=False, indent=4))
            except Exception as e:
                logger.warning(f'Failed to write cache file {cache_filename}: {e}')

        return response

    def get_cache_filename(self, prompt: str):
        hash_code = md5(prompt)
        return os.path.join(LLM_CACHE_DIR, self.model, hash_code[:2], f'{hash_code}.json')

    pass


kag.common.llm.client.OpenAIClient = CacheableOpenAIClient

We have already marked this issue into todo list, could you contact us through wechat group and speedup the progress?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants