can the kgextract pipeline continue from a cache/checkpoint #105

pecanjk · 2024-12-06T03:40:34Z

if error happened during the pipeline. Could it continue from a cache?

thundax-lyp · 2024-12-06T03:50:00Z

这个需求有点复杂。可以参考GraphRAG，在LLM侧做缓存。如果你是使用example里的indexes.py做的。可以参考一下代码。

class CacheableOpenAIClient(OpenAIClient):
    def __call__(self, prompt: str, image_url: str = None):
        if image_url is not None:
            return super().__call__(prompt, image_url)

        cache_filename = self.get_cache_filename(prompt)
        response = None
        if os.path.exists(cache_filename):
            try:
                with open(cache_filename, 'r', encoding='utf-8') as f:
                    data: dict = json.load(f)
                response = data.get('response')
            except Exception as e:
                logger.warning(f'Failed to load cache file {cache_filename}: {e}')
                try:
                    os.unlink(cache_filename)
                except Exception as e:
                    pass

        if response:
            os.utime(cache_filename, None)
            return response

        response = super().__call__(prompt, image_url)

        if response:
            os.makedirs(os.path.dirname(cache_filename), exist_ok=True)
            try:
                with open(cache_filename, 'w', encoding='utf-8') as f:
                    f.write(json.dumps({
                        'reqeust': {
                            'model': self.model, 'prompt': prompt, 'temperature': self.temperature
                        },
                        'response': response
                    }, ensure_ascii=False, indent=4))
            except Exception as e:
                logger.warning(f'Failed to write cache file {cache_filename}: {e}')

        return response

    def get_cache_filename(self, prompt: str):
        hash_code = md5(prompt)
        return os.path.join(LLM_CACHE_DIR, self.model, hash_code[:2], f'{hash_code}.json')

    pass


kag.common.llm.client.OpenAIClient = CacheableOpenAIClient

caszkgui · 2024-12-06T04:03:54Z

这个需求有点复杂。可以参考GraphRAG，在LLM侧做缓存。如果你是使用example里的indexes.py做的。可以参考一下代码。

class CacheableOpenAIClient(OpenAIClient):
    def __call__(self, prompt: str, image_url: str = None):
        if image_url is not None:
            return super().__call__(prompt, image_url)

        cache_filename = self.get_cache_filename(prompt)
        response = None
        if os.path.exists(cache_filename):
            try:
                with open(cache_filename, 'r', encoding='utf-8') as f:
                    data: dict = json.load(f)
                response = data.get('response')
            except Exception as e:
                logger.warning(f'Failed to load cache file {cache_filename}: {e}')
                try:
                    os.unlink(cache_filename)
                except Exception as e:
                    pass

        if response:
            os.utime(cache_filename, None)
            return response

        response = super().__call__(prompt, image_url)

        if response:
            os.makedirs(os.path.dirname(cache_filename), exist_ok=True)
            try:
                with open(cache_filename, 'w', encoding='utf-8') as f:
                    f.write(json.dumps({
                        'reqeust': {
                            'model': self.model, 'prompt': prompt, 'temperature': self.temperature
                        },
                        'response': response
                    }, ensure_ascii=False, indent=4))
            except Exception as e:
                logger.warning(f'Failed to write cache file {cache_filename}: {e}')

        return response

    def get_cache_filename(self, prompt: str):
        hash_code = md5(prompt)
        return os.path.join(LLM_CACHE_DIR, self.model, hash_code[:2], f'{hash_code}.json')

    pass


kag.common.llm.client.OpenAIClient = CacheableOpenAIClient

We have already marked this issue into todo list, could you contact us through wechat group and speedup the progress?

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

can the kgextract pipeline continue from a cache/checkpoint #105

can the kgextract pipeline continue from a cache/checkpoint #105

pecanjk commented Dec 6, 2024

thundax-lyp commented Dec 6, 2024

caszkgui commented Dec 6, 2024

can the kgextract pipeline continue from a cache/checkpoint #105

can the kgextract pipeline continue from a cache/checkpoint #105

Comments

pecanjk commented Dec 6, 2024

thundax-lyp commented Dec 6, 2024

caszkgui commented Dec 6, 2024