Skip to content

Commit

Permalink
Add fetch doc nodes cache for speed up download.
Browse files Browse the repository at this point in the history
  • Loading branch information
huacnlee committed Nov 13, 2023
1 parent b45c156 commit 458ba1f
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 23 deletions.
19 changes: 10 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,16 @@ yarn add feishu-pages

> 如果你想简单一些,也可以用 `.env` 文件来配置环境变量,注意避免 `FEISHU_APP_SECRET` 泄露到互联网。
| Name | Description | Required | Default |
| ------------------- | ----------------------------------------------------------------- | -------- | ---------------------- |
| `FEISHU_ENDPOINT` | 飞书 API 节点,如用 LarkSuite 可以通过这个配置 API 地址 | NO | https://open.feishu.cn |
| `FEISHU_APP_ID` | 飞书应用 ID | YES | |
| `FEISHU_APP_SECRET` | 飞书应用 Secret | YES | |
| `FEISHU_SPACE_ID` | 飞书知识库 ID | YES | |
| `OUTPUT_DIR` | 输出目录 | NO | `./dist` |
| `ROOT_NODE_TOKEN` | 根节点,导出节点以下(不含此节点)的所有内容。 | NO | |
| `BASE_URL` | 自定义文档里面相关文档输出的 URL 前缀,例如:`/docs/`,默认为 `/`,建议采用完整 URL 避免相对路径的各类问题。 | NO | `/` |
| Name | Description | Required | Default |
| ------------------- | ------------------------------------------------------------------------------------------------------------ | -------- | ---------------------- |
| `FEISHU_ENDPOINT` | 飞书 API 节点,如用 LarkSuite 可以通过这个配置 API 地址 | NO | https://open.feishu.cn |
| `FEISHU_APP_ID` | 飞书应用 ID | YES | |
| `FEISHU_APP_SECRET` | 飞书应用 Secret | YES | |
| `FEISHU_SPACE_ID` | 飞书知识库 ID | YES | |
| `OUTPUT_DIR` | 输出目录 | NO | `./dist` |
| `ROOT_NODE_TOKEN` | 根节点,导出节点以下(不含此节点)的所有内容。 | NO | |
| `BASE_URL` | 自定义文档里面相关文档输出的 URL 前缀,例如:`/docs/`,默认为 `/`,建议采用完整 URL 避免相对路径的各类问题。 | NO | `/` |
| `ROOT_NODE_TOKEN` | 从哪个节点 (node_token) 开始导出,例如:`6992046856314306562`,默认为空,走根节点开始。 | NO | |

## Usage

Expand Down
46 changes: 36 additions & 10 deletions feishu-pages/src/doc.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import { MarkdownRenderer } from 'feishu-docx';
import { Doc, feishuFetchWithIterator } from './feishu';
import fs from 'fs';
import path from 'path';
import { CACHE_DIR, Doc, feishuFetchWithIterator } from './feishu';

/**
* Fetch doc content
* https://open.feishu.cn/document/server-docs/docs/docs/docx-v1/document/raw_content
* @param document_id doc.obj_token
* @returns
*/
export const fetchDocBody = async (document_id: string) => {
console.info('Fetching doc: ', document_id, '...');
export const fetchDocBody = async (fileDoc: Doc) => {
let document_id = fileDoc.obj_token;

const doc = {
document: {
Expand All @@ -17,14 +19,38 @@ export const fetchDocBody = async (document_id: string) => {
blocks: [],
};

doc.blocks = await feishuFetchWithIterator(
'GET',
`/open-apis/docx/v1/documents/${document_id}/blocks`,
{
page_size: 500,
document_revision_id: -1,
const fetchDocBlocks = async (document_id: string) => {
// Check cache in .cache/docs/${document_id}.json
let cacheBlocks = path.join(CACHE_DIR, 'blocks', document_id + '.json');
fs.mkdirSync(path.dirname(cacheBlocks), { recursive: true });
if (fs.existsSync(cacheBlocks)) {
const doc = JSON.parse(fs.readFileSync(cacheBlocks, 'utf-8'));
if (doc?.obj_edit_time === fileDoc.obj_edit_time) {
console.info('Cache hit doc: ', document_id, '...');
return doc.blocks;
}
}
);

console.info('Fetching doc: ', document_id, '...');
const blocks = await feishuFetchWithIterator(
'GET',
`/open-apis/docx/v1/documents/${document_id}/blocks`,
{
page_size: 500,
document_revision_id: -1,
}
);
fs.writeFileSync(
cacheBlocks,
JSON.stringify({
obj_edit_time: fileDoc.obj_edit_time,
blocks,
})
);
return blocks;
};

doc.blocks = await fetchDocBlocks(document_id);

const render = new MarkdownRenderer(doc as any);
const content = render.parse();
Expand Down
12 changes: 11 additions & 1 deletion feishu-pages/src/feishu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ axios.interceptors.response.use(
return response;
},
async (error) => {
const { headers, data } = error.response;
const { headers, data, status } = error.response;

// Rate Limit code: 99991400, delay to retry
if (data?.code === 99991400) {
Expand All @@ -165,6 +165,8 @@ axios.interceptors.response.use(
return await axios.request(error.config);
}

console.error('fetch error:', error);

throw error;
}
);
Expand Down Expand Up @@ -244,6 +246,14 @@ export const feishuDownload = async (fileToken: string, localPath: string) => {
.catch((err) => {
const { message } = err;
console.error(' -> Failed to download image:', fileToken, message);
// If status is 403
// https://open.feishu.cn/document/server-docs/docs/drive-v1/faq#6e38a6de
if (message.includes('403')) {
console.error(
`无文件下载权限时接口将返回 403 的 HTTP 状态码。\nhttps://open.feishu.cn/document/server-docs/docs/drive-v1/faq#6e38a6de\nhttps://open.feishu.cn/document/server-docs/docs/drive-v1/download/download`
);
return null;
}
});
}

Expand Down
5 changes: 2 additions & 3 deletions feishu-pages/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import {
fetchTenantAccessToken,
} from './feishu';
import { FileDoc, generateSummary, prepareDocSlugs } from './summary';
import { humanizeFileSize, cleanupDocsForJSON } from './utils';
import { cleanupDocsForJSON, humanizeFileSize } from './utils';
import { fetchAllDocs } from './wiki';

// App entry
Expand Down Expand Up @@ -52,11 +52,10 @@ import { fetchAllDocs } from './wiki';
);
})();


const fetchDocBodies = async (docs: FileDoc[]) => {
for (let idx = 0; idx < docs.length; idx++) {
const doc = docs[idx];
const { content, fileTokens, meta } = await fetchDocBody(doc.obj_token);
const { content, fileTokens, meta } = await fetchDocBody(doc);

doc.content = content;
doc.meta = meta;
Expand Down

0 comments on commit 458ba1f

Please sign in to comment.