diff --git a/.eslintrc.js b/.eslintrc.js index 5bb4b16..77ad026 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -23,5 +23,7 @@ module.exports = { // We import from the default theme but its not a dep 'import/no-extraneous-dependencies': 'off', + 'unicorn/prefer-ternary': 'off', + 'unicorn/no-abusive-eslint-disable': 'off', }, }; diff --git a/packages/plugin/package.json b/packages/plugin/package.json index c7e76cf..0c59ff5 100644 --- a/packages/plugin/package.json +++ b/packages/plugin/package.json @@ -1,6 +1,6 @@ { "name": "@apify/docusaurus-plugin-typedoc-api", - "version": "4.2.2", + "version": "4.2.3-0", "description": "Docusaurus plugin that provides source code API documentation powered by TypeDoc. ", "keywords": [ "docusaurus", @@ -48,7 +48,8 @@ "@vscode/codicons": "^0.0.35", "marked": "^9.1.6", "marked-smartypants": "^1.1.5", - "typedoc": "^0.25.7" + "typedoc": "^0.25.7", + "zx": "^8.1.4" }, "devDependencies": { "@docusaurus/module-type-aliases": "^3.5.2", @@ -56,5 +57,6 @@ "react": "^18.2.0", "react-dom": "^18.2.0", "typescript": "^5.3.3" - } + }, + "stableVersion": "4.2.2" } \ No newline at end of file diff --git a/packages/plugin/src/index.ts b/packages/plugin/src/index.ts index 0863382..fc6cf07 100644 --- a/packages/plugin/src/index.ts +++ b/packages/plugin/src/index.ts @@ -13,6 +13,7 @@ import { generateJson, loadPackageJsonAndDocs, } from './plugin/data'; +import { generateJsonFromPythonProject } from './plugin/python-generator'; import { extractSidebar } from './plugin/sidebar'; import { getVersionedDocsDirPath, readVersionsMetadata } from './plugin/version'; import type { @@ -55,6 +56,7 @@ const DEFAULT_OPTIONS: Required = { remarkPlugins: [], rehypePlugins: [], versions: {}, + python: false, }; async function importFile(file: string): Promise { @@ -147,12 +149,19 @@ export default function typedocApiPlugin( console.log(`[${prefix}]:`, 'Generating docs...'); - await generateJson( - projectRoot, - entryPoints, - path.join(outDir, 'api-typedoc.json'), - options, - ); + if (options.python) { + await generateJsonFromPythonProject({ + projectRoot, + outFile: path.join(outDir, 'api-typedoc.json'), + }); + } else { + await generateJson( + projectRoot, + entryPoints, + path.join(outDir, 'api-typedoc.json'), + options, + ); + } console.log(`[${prefix}]:`, 'Persisting packages...'); @@ -198,6 +207,11 @@ export default function typedocApiPlugin( fs.mkdirSync(context.generatedFilesDir, { recursive: true }); } fs.copyFileSync(options.pathToCurrentVersionTypedocJSON, outFile); + } else if (options.python) { + await generateJsonFromPythonProject({ + projectRoot, + outFile, + }); } else { await generateJson(projectRoot, entryPoints, outFile, options); } diff --git a/packages/plugin/src/plugin/python-generator/index.ts b/packages/plugin/src/plugin/python-generator/index.ts new file mode 100644 index 0000000..2a665e4 --- /dev/null +++ b/packages/plugin/src/plugin/python-generator/index.ts @@ -0,0 +1,15 @@ +import { parseWithPydocMarkdown } from "./pydoc-markdown"; +import { pydocToTypedoc } from "./transform-docs"; + +export async function generateJsonFromPythonProject({ + outFile, + projectRoot, +} : { outFile: string, projectRoot: string }): Promise { + const pydocJson = await parseWithPydocMarkdown({ projectRoot }); + + await pydocToTypedoc({ + moduleName: 'python', // TODO: get from project config files or passed options + outFile, + pydocJson, + }); +} diff --git a/packages/plugin/src/plugin/python-generator/pydoc-markdown.ts b/packages/plugin/src/plugin/python-generator/pydoc-markdown.ts new file mode 100644 index 0000000..3d39f2e --- /dev/null +++ b/packages/plugin/src/plugin/python-generator/pydoc-markdown.ts @@ -0,0 +1,65 @@ +import { rmSync, writeFileSync } from 'fs'; +import path from 'path'; +import { $ } from 'zx'; + +/** + * Generates the pydoc-markdown configuration file + * @returns The pydoc-markdown configuration file as a string + */ +function getConfigYml({ + projectRoot +}: { projectRoot: string }): string { + return ` +loaders: + - type: python + search_path: ["${projectRoot}"] +processors: + - type: filter + skip_empty_modules: true + - type: crossref +renderer: + type: docusaurus + docs_base_path: docs + relative_output_path: reference + relative_sidebar_path: sidebar.json + sidebar_top_level_label: null +` +} + +export async function parseWithPydocMarkdown({ + projectRoot, +}: { + projectRoot: string, +} +): Promise { + // Check whether the user has Python and pydoc-markdown installed + for (const cmd of ['python', 'pydoc-markdown']) { + try { + // eslint-disable-next-line no-await-in-loop + await $`${cmd} --version`; + } catch { + throw new Error(`Please install ${cmd} to use this plugin with Python projects.`); + } + }; + + // Generate the JSON file + try { + const configYml = getConfigYml({ projectRoot }); + const configPath = path.join(__dirname, 'pydoc-markdown.temp.yml'); + writeFileSync(configPath, configYml); + + const pydoc = await $`pydoc-markdown --quiet --dump ${configPath}`; + + rmSync(configPath); + + let json = await pydoc.text(); + + json = json.replaceAll(path.resolve(projectRoot), 'REPO_ROOT_PLACEHOLDER'); + + return json; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } catch (error: any) { + // eslint-disable-next-line + throw new Error(`Failed to generate JSON file from Python project:\n\t${error.stderr.split('\n').slice(-2).join('\n')}`); + } +} diff --git a/packages/plugin/src/plugin/python-generator/transform-docs.ts b/packages/plugin/src/plugin/python-generator/transform-docs.ts new file mode 100644 index 0000000..1ca22d5 --- /dev/null +++ b/packages/plugin/src/plugin/python-generator/transform-docs.ts @@ -0,0 +1,417 @@ +/* eslint-disable */ +import fs from 'fs'; +import { $ } from 'zx'; + +const REPO_ROOT_PLACEHOLDER = 'REPO_ROOT_PLACEHOLDER'; + +// TODO: Make these parametrizable (gitRepoUrls array option) +const APIFY_CLIENT_REPO_URL = 'https://github.com/apify/apify-client-python'; +const APIFY_SDK_REPO_URL = 'https://github.com/apify/apify-sdk-python'; +const APIFY_SHARED_REPO_URL = 'https://github.com/apify/apify-shared-python'; +const CRAWLEE_PYTHON_REPO_URL = 'https://github.com/apify/crawlee-python'; + +const REPO_URL_PER_PACKAGE = { + 'apify': APIFY_SDK_REPO_URL, + 'apify_client': APIFY_CLIENT_REPO_URL, + 'apify_shared': APIFY_SHARED_REPO_URL, + 'crawlee': CRAWLEE_PYTHON_REPO_URL, +} as const; + +const TAG_PER_PACKAGE: Record = {}; +let MODULE_SHORTCUTS: Record = {}; + +async function initPackageTags({ + moduleName +}: { + moduleName?: string, +}) { + // For each package, get the installed version, and set the tag to the corresponding version + for (const pkg of ['apify', 'apify_client', 'apify_shared']) { + try { + const packageVersion = await $`python -c 'import ${pkg}; print(${pkg}.__version__)'`; + if (packageVersion.exitCode === 0) { + TAG_PER_PACKAGE[pkg] = `v${await packageVersion.text()}`; + } + } catch (e) { + console.warn(`Failed to get version of package ${pkg}`); + } + } + + if(!moduleName) { + const thisPackagePyprojectToml = fs.readFileSync('../pyproject.toml', 'utf8'); + moduleName = thisPackagePyprojectToml.match(/^name = "(.+)"$/m)?.[1]; + } + + // For the current package, set the tag to 'master' + TAG_PER_PACKAGE[moduleName!] = 'master'; + + return TAG_PER_PACKAGE; +} + +async function initModuleShortcuts() { + if(!fs.existsSync('./module_shortcuts.json')) { + return console.warn('No module_shortcuts.json file found, skipping module shortcuts.'); + } + + MODULE_SHORTCUTS = JSON.parse(fs.readFileSync('./module_shortcuts.json', 'utf8')); +} + +// Taken from https://github.com/TypeStrong/typedoc/blob/v0.23.24/src/lib/models/reflections/kind.ts, modified +const TYPEDOC_KINDS = { + 'class': { + kind: 128, + kindString: 'Class', + }, + 'function': { + kind: 2048, + kindString: 'Method', + }, + 'data': { + kind: 1024, + kindString: 'Property', + }, + 'enum': { + kind: 8, + kindString: 'Enumeration', + }, + 'enumValue': { + kind: 16, + kindString: 'Enumeration Member', + }, +} + +const GROUP_ORDER = [ + 'Main Classes', + 'Helper Classes', + 'Errors', + 'Constructors', + 'Methods', + 'Properties', + 'Constants', + 'Enumeration Members' +] as const; + +const groupSort = (g1: typeof GROUP_ORDER[number], g2: typeof GROUP_ORDER[number]) => { + if(GROUP_ORDER.includes(g1) && GROUP_ORDER.includes(g2)){ + return GROUP_ORDER.indexOf(g1) - GROUP_ORDER.indexOf(g2) + } + return g1.localeCompare(g2); +}; + +function getGroupName(object: any) { + const groupPredicates: Record< + typeof GROUP_ORDER[number], + (object: any) => boolean + > = { + 'Errors': (x) => x.name.toLowerCase().includes('error'), + 'Main Classes': (x) => ['Dataset', 'KeyValueStore', 'RequestQueue'].includes(x.name) || x.name.endsWith('Crawler'), + 'Helper Classes': (x) => x.kindString === 'Class', + 'Methods': (x) => x.kindString === 'Method', + 'Constructors': (x) => x.kindString === 'Constructor', + 'Properties': (x) => x.kindString === 'Property', + 'Constants': (x) => x.kindString === 'Enumeration', + 'Enumeration Members': (x) => x.kindString === 'Enumeration Member', + }; + + const [group] = Object.entries(groupPredicates).find( + ([_, predicate]) => predicate(object) + )!; + + return group; +} + +// Strips the Optional[] type from the type string, and replaces generic types with just the main type +function getBaseType(type: any) { + return type?.replace(/Optional\[(.*)\]/g, '$1').replace('ListPage[Dict]', 'ListPage'); +} + +// Returns whether a type is a custom class, or a primitive type +function isCustomClass(type: string) { + return !['dict', 'list', 'str', 'int', 'float', 'bool'].includes(type.toLowerCase()); +} + +// Infer the Typedoc type from the docspec type +function inferTypedocType(docspecType: any): Record | undefined { + const typeWithoutOptional = getBaseType(docspecType); + if (!typeWithoutOptional) { + return undefined; + } + + // Typically, if a type is a custom class, it will be a reference in Typedoc + return isCustomClass(typeWithoutOptional) ? { + type: 'reference', + name: docspecType + } : { + type: 'intrinsic', + name: docspecType, + } +} + +// Sorts the groups of a Typedoc member, and sorts the children of each group +function sortChildren(typedocMember: any) { + for (let group of typedocMember.groups) { + group.children + .sort((a: any, b: any) => { + const firstName = typedocMember.children.find((x: any) => x.id === a).name; + const secondName = typedocMember.children.find((x: any) => x.id === b).name; + return firstName.localeCompare(secondName); + }); + } + typedocMember.groups.sort((a: { title: typeof GROUP_ORDER[number] }, b: { title: typeof GROUP_ORDER[number] }) => groupSort(a.title, b.title)); +} + +// Parses the arguments and return value description of a method from its docstring +function extractArgsAndReturns(docstring: string) { + const parameters = (docstring + .split('Args:')[1] ?? '').split('Returns:')[0] // Get the part between Args: and Returns: + .split(/(^|\n)\s*([\w]+)\s*\(.*?\)\s*:\s*/) // Magic regex which splits the arguments into an array, and removes the argument types + .filter(x => x.length > 1) // Remove empty strings + .reduce((acc, curr, idx, arr) => { // Collect the argument names and types into an object + if(idx % 2 === 0){ + return {...acc, [curr]: arr[idx+1]} // If the index is even, the current string is an argument name, and the next string is its type + } + return acc; + }, {} as Record); + + const returns = (docstring + .split('Returns:')[1] ?? '').split('Raises:')[0] // Get the part between Returns: and Raises: + .split(':')[1]?.trim() || undefined; // Split the return value into its type and description, return description + + + return { parameters, returns }; +} + +// Objects with decorators named 'ignore_docs' or with empty docstrings will be ignored +function isHidden(member: any) { + return member.decorations?.some((d: { name: string }) => d.name === 'ignore_docs') || member.name === 'ignore_docs'; +} + +// Each object in the Typedoc structure has an unique ID, +// we'll just increment it for each object we convert +let oid = 1; + +const symbolIdMap: { qualifiedName: string, sourceFileName: string }[] = []; + +// Converts a docspec object to a Typedoc object, including all its children +function convertObject(obj: any, parent: any, module: any) { + const rootModuleName: string = module.name.split('.')[0]; + for (let member of obj.members ?? []) { + let typedocKind = TYPEDOC_KINDS[member.type as keyof typeof TYPEDOC_KINDS]; + + if(member.bases?.includes('Enum')) { + typedocKind = TYPEDOC_KINDS['enum']; + } + + let typedocType = inferTypedocType(member.datatype); + + if (member.decorations?.some((d: { name: string }) => ['property', 'dualproperty'].includes(d.name))) { + typedocKind = TYPEDOC_KINDS['data']; + typedocType = inferTypedocType(member.return_type ?? member.datatype); + } + + if(parent.kindString === 'Enumeration') { + typedocKind = TYPEDOC_KINDS['enumValue']; + typedocType = { + type: 'literal', + value: member.value, + } + } + + if(member.type in TYPEDOC_KINDS && !isHidden(member)) { + // Get the URL of the member in GitHub + const repoBaseUrl = `${REPO_URL_PER_PACKAGE[rootModuleName as keyof typeof REPO_URL_PER_PACKAGE]}/blob/${TAG_PER_PACKAGE[rootModuleName] ?? 'master'}`; + const filePathInRepo = member.location.filename.replace(REPO_ROOT_PLACEHOLDER, ''); + const fileGitHubUrl = member.location.filename.replace(REPO_ROOT_PLACEHOLDER, repoBaseUrl); + const memberGitHubUrl = `${fileGitHubUrl}#L${member.location.lineno}`; + + symbolIdMap.push({ + qualifiedName: member.name, + sourceFileName: filePathInRepo, + }); + + // Get the module name of the member, and check if it has a shortcut (reexport from an ancestor module) + const fullName = `${module.name}.${member.name}`; + let moduleName = module.name; + if (fullName in MODULE_SHORTCUTS) { + moduleName = MODULE_SHORTCUTS[fullName].replace(`.${member.name}`, ''); + } + + // Create the Typedoc member object + let typedocMember = { + id: oid++, + name: member.name, + module: moduleName, // This is an extension to the original Typedoc structure, to support showing where the member is exported from + ...typedocKind, + flags: {}, + comment: member.docstring ? { + summary: [{ + kind: 'text', + text: member.docstring?.content, + }], + } : undefined, + type: typedocType, + children: [], + groups: [], + sources: [{ + filename: filePathInRepo, + line: member.location.lineno, + character: 1, + url: memberGitHubUrl, + }], + signatures: [] as any[], + }; + + if(typedocMember.kindString === 'Method') { + const { parameters, returns } = extractArgsAndReturns(member.docstring?.content ?? ''); + + typedocMember.signatures = [{ + id: oid++, + name: member.name, + modifiers: member.modifiers ?? [], + kind: 4096, + kindString: 'Call signature', + flags: {}, + comment: member.docstring ? { + summary: [{ + kind: 'text', + text: member.docstring?.content + .replace(/\**(Args|Arguments|Returns)[\s\S]+/, ''), + }], + blockTags: returns ? [ + { tag: '@returns', content: [{ kind: 'text', text: returns }] }, + ] : undefined, + } : undefined, + type: inferTypedocType(member.return_type), + parameters: member.args + .filter((arg: any) => (arg.name !== 'self' && arg.name !== 'cls')) + .map((arg: any) => ({ + id: oid++, + name: arg.name, + kind: 32768, + kindString: 'Parameter', + flags: { + isOptional: arg.datatype?.includes('Optional') ? 'true' : undefined, + 'keyword-only': arg.type === 'KEYWORD_ONLY' ? 'true' : undefined, + }, + type: inferTypedocType(arg.datatype), + comment: parameters[arg.name] ? { + summary: [{ + kind: 'text', + text: parameters[arg.name] + }] + } : undefined, + defaultValue: arg.default_value, + })), + }]; + } + + if(typedocMember.name === '__init__') { + typedocMember.kind = 512; + typedocMember.kindString = 'Constructor'; + } + + convertObject(member, typedocMember, module); + + const groupName = getGroupName(typedocMember); + + const group = parent.groups.find((g: { title: string }) => g.title === groupName); + if (group) { + group.children.push(typedocMember.id); + } else { + parent.groups.push({ + title: groupName, + children: [typedocMember.id], + }); + } + + sortChildren(typedocMember); + parent.children.push(typedocMember); + } + } +} + +export async function pydocToTypedoc({ + pydocFile, + pydocJson, + outFile, + moduleName, +}: { + pydocFile?: string, + pydocJson?: string, + outFile: string, + moduleName?: string, +}) { + await initPackageTags({ moduleName }); + await initModuleShortcuts(); + + // Root object of the Typedoc structure + const typedocApiReference = { + 'id': 0, + 'name': 'apify-client', + 'kind': 1, + 'kindString': 'Project', + 'flags': {}, + 'originalName': '', + 'children': [], + 'groups': [], + 'sources': [ + { + 'fileName': 'src/index.ts', + 'line': 1, + 'character': 0, + 'url': `http://example.com/blob/123456/src/dummy.py`, + } + ], + 'symbolIdMap': {}, + }; + + // Load the docspec dump files of this module and of apify-shared + const thisPackageDocspecDump = pydocJson ?? fs.readFileSync(pydocFile!, 'utf8'); + const thisPackageModules = thisPackageDocspecDump.split('\n').filter((line) => line !== ''); + + // Convert all the modules, store them in the root object + for (const module of [...thisPackageModules]) { + const parsedModule = JSON.parse(module); + convertObject(parsedModule, typedocApiReference, parsedModule); + }; + + // Recursively fix references (collect names->ids of all the named entities and then inject those in the reference objects) + const namesToIds: Record = {}; + function collectIds(obj: Record) { + for (const child of obj.children ?? []) { + namesToIds[child.name] = child.id; + collectIds(child); + } + } + collectIds(typedocApiReference); + + function fixRefs(obj: Record) { + for (const child of obj.children ?? []) { + if (child.type?.type === 'reference') { + child.type.id = namesToIds[child.type.name]; + } + if (child.signatures) { + for (const sig of child.signatures) { + for (const param of sig.parameters ?? []) { + if (param.type?.type === 'reference') { + param.type.id = namesToIds[param.type.name]; + } + } + if (sig.type?.type === 'reference') { + sig.type.id = namesToIds[sig.type.name]; + } + } + } + fixRefs(child); + } + } + fixRefs(typedocApiReference); + + // Sort the children of the root object + sortChildren(typedocApiReference); + + typedocApiReference.symbolIdMap = Object.fromEntries(Object.entries(symbolIdMap)); + + // Write the Typedoc structure to the output file + fs.writeFileSync(outFile, JSON.stringify(typedocApiReference, null, 4)); +} diff --git a/packages/plugin/src/types.ts b/packages/plugin/src/types.ts index 584ee73..2a240e9 100644 --- a/packages/plugin/src/types.ts +++ b/packages/plugin/src/types.ts @@ -30,6 +30,7 @@ export interface DocusaurusPluginTypeDocApiOptions sortSidebar?: (a: string, d: string) => number; tsconfigName?: string; typedocOptions?: Partial; + python: boolean; remarkPlugins: MDXPlugin[]; rehypePlugins: MDXPlugin[]; diff --git a/packages/plugin/tsconfig.json b/packages/plugin/tsconfig.json index 1fb46e6..533ebbc 100644 --- a/packages/plugin/tsconfig.json +++ b/packages/plugin/tsconfig.json @@ -3,5 +3,8 @@ "include": [ "src/**/*", "types/**/*" - ] + ], + "compilerOptions": { + "lib": ["es2021"], + } } diff --git a/yarn.lock b/yarn.lock index f5c3127..7c00c8a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -232,6 +232,7 @@ __metadata: react-dom: "npm:^18.2.0" typedoc: "npm:^0.25.7" typescript: "npm:^5.3.3" + zx: "npm:^8.1.4" peerDependencies: "@docusaurus/core": ^3.5.2 "@docusaurus/mdx-loader": ^3.5.2 @@ -4047,6 +4048,16 @@ __metadata: languageName: node linkType: hard +"@types/fs-extra@npm:>=11": + version: 11.0.4 + resolution: "@types/fs-extra@npm:11.0.4" + dependencies: + "@types/jsonfile": "npm:*" + "@types/node": "npm:*" + checksum: acc4c1eb0cde7b1f23f3fe6eb080a14832d8fa9dc1761aa444c5e2f0f6b6fa657ed46ebae32fb580a6700fc921b6165ce8ac3e3ba030c3dd15f10ad4dd4cae98 + languageName: node + linkType: hard + "@types/gtag.js@npm:^0.0.12": version: 0.0.12 resolution: "@types/gtag.js@npm:0.0.12" @@ -4139,6 +4150,15 @@ __metadata: languageName: node linkType: hard +"@types/jsonfile@npm:*": + version: 6.1.4 + resolution: "@types/jsonfile@npm:6.1.4" + dependencies: + "@types/node": "npm:*" + checksum: 309fda20eb5f1cf68f2df28931afdf189c5e7e6bec64ac783ce737bb98908d57f6f58757ad5da9be37b815645a6f914e2d4f3ac66c574b8fe1ba6616284d0e97 + languageName: node + linkType: hard + "@types/marked@npm:^6.0.0": version: 6.0.0 resolution: "@types/marked@npm:6.0.0" @@ -4201,7 +4221,7 @@ __metadata: languageName: node linkType: hard -"@types/node@npm:*": +"@types/node@npm:*, @types/node@npm:>=20": version: 22.5.0 resolution: "@types/node@npm:22.5.0" dependencies: @@ -18797,3 +18817,20 @@ __metadata: checksum: f22ec5fc2d5f02c423c93d35cdfa83573a3a3bd98c66b927c368ea4d0e7252a500df2a90a6b45522be536a96a73404393c958e945fdba95e6832c200791702b6 languageName: node linkType: hard + +"zx@npm:^8.1.4": + version: 8.1.4 + resolution: "zx@npm:8.1.4" + dependencies: + "@types/fs-extra": "npm:>=11" + "@types/node": "npm:>=20" + dependenciesMeta: + "@types/fs-extra": + optional: true + "@types/node": + optional: true + bin: + zx: build/cli.js + checksum: 1ffa4c51a1edad25de0729d09667b3d1b7b4f9c8f6b4300e34d85f8f18c2e768f7e297b9bfad4d3b8a24792b3f14085f229933d0a224febba49ac2588ed155b1 + languageName: node + linkType: hard