Skip to content

Commit

Permalink
Merge of #9462
Browse files Browse the repository at this point in the history
  • Loading branch information
mergify[bot] authored Dec 23, 2024
2 parents d9c56db + 35618a6 commit ad50327
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 72 deletions.
2 changes: 2 additions & 0 deletions apps/app/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,11 @@
"reconnecting-websocket": "^4.4.0",
"redis": "^3.0.2",
"rehype-katex": "^7.0.1",
"rehype-meta": "^4.0.1",
"rehype-raw": "^7.0.0",
"rehype-sanitize": "^6.0.0",
"rehype-slug": "^6.0.0",
"rehype-stringify": "^10.0.1",
"rehype-toc": "^3.0.2",
"remark-breaks": "^4.0.0",
"remark-directive": "^3.0.0",
Expand Down
15 changes: 8 additions & 7 deletions apps/app/src/features/openai/server/services/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import assert from 'node:assert';
import { Readable, Transform } from 'stream';
import { pipeline } from 'stream/promises';

import type { IPagePopulatedToShowRevision } from '@growi/core';
import { PageGrant, isPopulated } from '@growi/core';
import type { HydratedDocument, Types } from 'mongoose';
import mongoose from 'mongoose';
Expand All @@ -20,7 +21,7 @@ import { createBatchStream } from '~/server/util/batch-stream';
import loggerFactory from '~/utils/logger';

import { OpenaiServiceTypes } from '../../interfaces/ai';
import { sanitizeMarkdown } from '../utils/sanitize-markdown';
import { convertMarkdownToHtml } from '../utils/convert-markdown-to-html';

import { getClient } from './client-delegator';
// import { splitMarkdownIntoChunks } from './markdown-splitter/markdown-token-splitter';
Expand Down Expand Up @@ -157,9 +158,9 @@ class OpenaiService implements IOpenaiService {
// }
// }

private async uploadFile(pageId: Types.ObjectId, body: string): Promise<OpenAI.Files.FileObject> {
const sanitizedMarkdown = await sanitizeMarkdown(body);
const file = await toFile(Readable.from(sanitizedMarkdown), `${pageId}.md`);
private async uploadFile(pageId: Types.ObjectId, pagePath: string, revisionBody: string): Promise<OpenAI.Files.FileObject> {
const convertedHtml = await convertMarkdownToHtml({ pagePath, revisionBody });
const file = await toFile(Readable.from(convertedHtml), `${pageId}.html`);
const uploadedFile = await this.client.uploadFile(file);
return uploadedFile;
}
Expand All @@ -183,17 +184,17 @@ class OpenaiService implements IOpenaiService {
async createVectorStoreFile(pages: Array<HydratedDocument<PageDocument>>): Promise<void> {
const vectorStore = await this.getOrCreateVectorStoreForPublicScope();
const vectorStoreFileRelationsMap: VectorStoreFileRelationsMap = new Map();
const processUploadFile = async(page: PageDocument) => {
const processUploadFile = async(page: HydratedDocument<PageDocument>) => {
if (page._id != null && page.grant === PageGrant.GRANT_PUBLIC && page.revision != null) {
if (isPopulated(page.revision) && page.revision.body.length > 0) {
const uploadedFile = await this.uploadFile(page._id, page.revision.body);
const uploadedFile = await this.uploadFile(page._id, page.path, page.revision.body);
prepareVectorStoreFileRelations(vectorStore._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
return;
}

const pagePopulatedToShowRevision = await page.populateDataToShowRevision();
if (pagePopulatedToShowRevision.revision != null && pagePopulatedToShowRevision.revision.body.length > 0) {
const uploadedFile = await this.uploadFile(page._id, pagePopulatedToShowRevision.revision.body);
const uploadedFile = await this.uploadFile(page._id, page.path, pagePopulatedToShowRevision.revision.body);
prepareVectorStoreFileRelations(vectorStore._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import { dynamicImport } from '@cspell/dynamic-import';
import type { Root, Code } from 'mdast';
import type * as RehypeMeta from 'rehype-meta';
import type * as RehypeStringify from 'rehype-stringify';
import type * as RemarkParse from 'remark-parse';
import type * as RemarkRehype from 'remark-rehype';
import type * as Unified from 'unified';
import type * as UnistUtilVisit from 'unist-util-visit';

interface ModuleCache {
unified?: typeof Unified.unified;
visit?: typeof UnistUtilVisit.visit;
remarkParse?: typeof RemarkParse.default;
remarkRehype?: typeof RemarkRehype.default;
rehypeMeta?: typeof RehypeMeta.default;
rehypeStringify?: typeof RehypeStringify.default;
}

let moduleCache: ModuleCache = {};

const initializeModules = async(): Promise<void> => {
if (moduleCache.unified != null
&& moduleCache.visit != null
&& moduleCache.remarkParse != null
&& moduleCache.remarkRehype != null
&& moduleCache.rehypeMeta != null
&& moduleCache.rehypeStringify != null
) {
return;
}

const [
{ unified },
{ visit },
{ default: remarkParse },
{ default: remarkRehype },
{ default: rehypeMeta },
{ default: rehypeStringify },
] = await Promise.all([
dynamicImport<typeof Unified>('unified', __dirname),
dynamicImport<typeof UnistUtilVisit>('unist-util-visit', __dirname),
dynamicImport<typeof RemarkParse>('remark-parse', __dirname),
dynamicImport<typeof RemarkRehype>('remark-rehype', __dirname),
dynamicImport<typeof RehypeMeta>('rehype-meta', __dirname),
dynamicImport<typeof RehypeStringify>('rehype-stringify', __dirname),
]);

moduleCache = {
unified,
visit,
remarkParse,
remarkRehype,
rehypeMeta,
rehypeStringify,
};
};

export const convertMarkdownToHtml = async({ pagePath, revisionBody }: { pagePath: string, revisionBody: string }): Promise<string> => {
await initializeModules();

const {
unified, visit, remarkParse, remarkRehype, rehypeMeta, rehypeStringify,
} = moduleCache;

if (unified == null || visit == null || remarkParse == null || remarkRehype == null || rehypeMeta == null || rehypeStringify == null) {
throw new Error('Failed to initialize required modules');
}

const sanitizeMarkdown = () => {
return (tree: Root) => {
visit(tree, 'code', (node: Code) => {
if (node.lang === 'drawio') {
node.value = '<!-- drawio content replaced -->';
}
});
};
};

const processor = unified()
.use(remarkParse)
.use(sanitizeMarkdown)
.use(remarkRehype)
.use(rehypeMeta, {
title: pagePath,
})
.use(rehypeStringify);

return processor.processSync(revisionBody).toString();
};
65 changes: 0 additions & 65 deletions apps/app/src/features/openai/server/utils/sanitize-markdown.ts

This file was deleted.

69 changes: 69 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit ad50327

Please sign in to comment.