Skip to content

Commit

Permalink
feat: Initial docsgen crate (#3368)
Browse files Browse the repository at this point in the history
  • Loading branch information
scsmithr authored Dec 18, 2024
1 parent 5cb5ef9 commit 626d9f9
Show file tree
Hide file tree
Showing 8 changed files with 566 additions and 0 deletions.
23 changes: 23 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions crates/docgen/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[package]
name = "docgen"
version.workspace = true
edition.workspace = true

[dependencies]
logutil = { path = '../logutil' }
rayexec_error = { path = '../rayexec_error' }
rayexec_execution = { path = '../rayexec_execution' }
rayexec_server = { path = '../rayexec_server' }
rayexec_shell = { path = '../rayexec_shell' }
rayexec_rt_native = { path = '../rayexec_rt_native' }
rayexec_bullet = { path = '../rayexec_bullet' }
rayexec_postgres = { path = '../rayexec_postgres' }
rayexec_parquet = { path = '../rayexec_parquet' }
rayexec_csv = { path = '../rayexec_csv' }
rayexec_delta = { path = '../rayexec_delta' }
rayexec_unity_catalog = { path = '../rayexec_unity_catalog' }
rayexec_iceberg = { path = '../rayexec_iceberg' }
rayexec_debug = { path = '../rayexec_debug' }
regex = { workspace = true }
tracing = { workspace = true }
tokio = { workspace = true, default-features = false, features = ["rt", "rt-multi-thread", "time", "net"] }
118 changes: 118 additions & 0 deletions crates/docgen/src/file.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
use std::fmt::Write as _;
use std::fs;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::sync::LazyLock;

use rayexec_error::{RayexecError, Result};
use regex::Regex;
use tracing::info;

use crate::section::SectionWriter;
use crate::session::DocsSession;

static DOCSGEN_START_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"<!--\s*DOCSGEN_START\s+([a-zA-Z0-9_]+)\s*-->").unwrap());

static DOCSGEN_END_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"<!--\s*DOCSGEN_END\s*-->").unwrap());

fn expand_path(path: &str) -> String {
format!("{}/../../{}", env!("CARGO_MANIFEST_DIR"), path)
}

#[derive(Debug)]
pub struct DocFile {
pub path: &'static str,
pub sections: &'static [(&'static str, &'static dyn SectionWriter)],
}

impl DocFile {
pub fn overwrite(&self, session: &DocsSession) -> Result<()> {
let path = expand_path(self.path);
info!(%path, "expanded path");

let file = fs::OpenOptions::new().read(true).write(true).open(&path)?;

let lines: Vec<String> = BufReader::new(&file).lines().collect::<Result<_, _>>()?;

// Write to buffer instead of file directly in case we error early.
let mut buf = String::new();

let mut in_docsgen_section = false;

for (idx, line) in lines.iter().enumerate() {
match DOCSGEN_START_REGEX.captures(line) {
Some(captures) => {
if in_docsgen_section {
return Err(RayexecError::new("Cannot nest docsgen sections")
.with_field("line_number", idx + 1));
}
in_docsgen_section = true;

let section_name = captures.get(1).unwrap().as_str();

let section = self
.sections
.iter()
.find_map(|(name, section)| {
if *name == section_name {
Some(section)
} else {
None
}
})
.ok_or_else(|| {
RayexecError::new(format!("Missing docs section: {section_name}"))
})?;

// Write original line + extra newline
writeln!(buf, "{}", line)?;
writeln!(buf)?;

// Write out section.
section.write(session, &mut buf)?;
}
None => {
if DOCSGEN_END_REGEX.is_match(line.as_str()) {
if !in_docsgen_section {
return Err(RayexecError::new(
"Found DOCSGEN_END tag when not in a docsgen section",
)
.with_field("line_number", idx + 1));
}

in_docsgen_section = false;

// Write extra newline + original line
writeln!(buf)?;
writeln!(buf, "{}", line)?;
} else {
// Only write out stuff outside of the docgen section.
// We already wrote the new output, so we need to
// discard the old stuff.
if !in_docsgen_section {
writeln!(buf, "{}", line)?;
}
}
}
}
}

if in_docsgen_section {
return Err(RayexecError::new(
"Reached end of file, still in docsgen section",
));
}

let file = fs::OpenOptions::new()
.write(true)
.truncate(true)
.open(&path)?;

let mut writer = BufWriter::new(file);
writer.write_all(buf.as_bytes())?;
writer.flush()?;

Ok(())
}
}
57 changes: 57 additions & 0 deletions crates/docgen/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
mod file;
mod markdown_table;
mod section;
mod session;

use file::DocFile;
use rayexec_csv::CsvDataSource;
use rayexec_delta::DeltaDataSource;
use rayexec_error::Result;
use rayexec_execution::datasource::{DataSourceBuilder, DataSourceRegistry, MemoryDataSource};
use rayexec_iceberg::IcebergDataSource;
use rayexec_parquet::ParquetDataSource;
use rayexec_postgres::PostgresDataSource;
use rayexec_rt_native::runtime::{NativeRuntime, ThreadedNativeExecutor};
use rayexec_shell::session::SingleUserEngine;
use rayexec_unity_catalog::UnityCatalogDataSource;
use section::{AggregateFunctionWriter, ScalarFunctionWriter, TableFunctionWriter};
use session::DocsSession;
use tracing::info;

const FILES: &[DocFile] = &[DocFile {
path: "docs/sql/functions.md",
sections: &[
("scalar_functions", &ScalarFunctionWriter),
("aggregate_functions", &AggregateFunctionWriter),
("table_functions", &TableFunctionWriter),
],
}];

fn main() -> Result<()> {
logutil::configure_global_logger(tracing::Level::INFO, logutil::LogFormat::HumanReadable);

info!("starting docs gen");

let executor = ThreadedNativeExecutor::try_new().unwrap();
let runtime = NativeRuntime::with_default_tokio().unwrap();

let registry = DataSourceRegistry::default()
.with_datasource("memory", Box::new(MemoryDataSource))?
.with_datasource("postgres", PostgresDataSource::initialize(runtime.clone()))?
.with_datasource("delta", DeltaDataSource::initialize(runtime.clone()))?
.with_datasource("unity", UnityCatalogDataSource::initialize(runtime.clone()))?
.with_datasource("parquet", ParquetDataSource::initialize(runtime.clone()))?
.with_datasource("csv", CsvDataSource::initialize(runtime.clone()))?
.with_datasource("iceberg", IcebergDataSource::initialize(runtime.clone()))?;
let engine = SingleUserEngine::try_new(executor, runtime, registry)?;
let session = DocsSession { engine };

for file in FILES {
info!(%file.path, "handing file");
file.overwrite(&session)?;
}

info!("completed all files");

Ok(())
}
89 changes: 89 additions & 0 deletions crates/docgen/src/markdown_table.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
use std::fmt;

use rayexec_bullet::batch::Batch;
use rayexec_bullet::field::Schema;
use rayexec_bullet::format::{FormatOptions, Formatter};
use rayexec_error::Result;

const FORMATTER: Formatter = Formatter::new(FormatOptions {
null: "",
empty_string: "",
});

pub fn write_markdown_table<'a>(
output: &mut dyn fmt::Write,
schema: &Schema,
batches: impl IntoIterator<Item = &'a Batch>,
) -> Result<()> {
// 'field1 | field2 | field3'
let header = schema
.fields
.iter()
.map(|f| f.name.clone())
.collect::<Vec<_>>()
.join(" | ");

writeln!(output, "| {header} |")?;

// ' --- | --- | ---'
let sep = schema
.fields
.iter()
.map(|_| "---")
.collect::<Vec<_>>()
.join(" | ");

writeln!(output, "| {sep} |")?;

for batch in batches {
for row in 0..batch.num_rows() {
for (idx, column) in batch.columns().iter().enumerate() {
if idx == 0 {
write!(output, "|")?;
}

let val = FORMATTER.format_array_value(column, row)?;
write!(output, " {val} |")?;
}
writeln!(output)?;
}
}

Ok(())
}

#[cfg(test)]
mod tests {
use rayexec_bullet::array::Array;
use rayexec_bullet::datatype::DataType;
use rayexec_bullet::field::Field;

use super::*;

#[test]
fn simple() {
let batch = Batch::try_new([
Array::from_iter([1, 2, 3]),
Array::from_iter(["cat", "dog", "mouse"]),
])
.unwrap();

let schema = Schema::new([
Field::new("Numbers", DataType::Int32, false),
Field::new("Strings", DataType::Utf8, false),
]);

let mut buf = String::new();

write_markdown_table(&mut buf, &schema, [&batch]).unwrap();

let expected = r#"| Numbers | Strings |
| --- | --- |
| 1 | cat |
| 2 | dog |
| 3 | mouse |
"#;

assert_eq!(expected, buf);
}
}
70 changes: 70 additions & 0 deletions crates/docgen/src/section.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
use std::fmt::{self, Debug};

use rayexec_error::Result;

use crate::markdown_table::write_markdown_table;
use crate::session::DocsSession;

pub trait SectionWriter: Debug {
fn write(&self, session: &DocsSession, output: &mut dyn fmt::Write) -> Result<()>;
}

const SCALAR_FUNCTIONS_QUERY: &str = r#"
SELECT
function_name as "Function name",
description as "Description"
FROM list_functions()
WHERE function_type = 'scalar'
GROUP BY "Function name", "Description"
ORDER BY "Function name";
"#;

#[derive(Debug)]
pub struct ScalarFunctionWriter;

impl SectionWriter for ScalarFunctionWriter {
fn write(&self, session: &DocsSession, output: &mut dyn fmt::Write) -> Result<()> {
let table = session.query(SCALAR_FUNCTIONS_QUERY)?;
write_markdown_table(output, table.schema(), table.iter_batches())
}
}

const AGGREGATE_FUNCTIONS_QUERY: &str = r#"
SELECT
function_name as "Function name",
description as "Description"
FROM list_functions()
WHERE function_type = 'aggregate'
GROUP BY "Function name", "Description"
ORDER BY "Function name";
"#;

#[derive(Debug)]
pub struct AggregateFunctionWriter;

impl SectionWriter for AggregateFunctionWriter {
fn write(&self, session: &DocsSession, output: &mut dyn fmt::Write) -> Result<()> {
let table = session.query(AGGREGATE_FUNCTIONS_QUERY)?;
write_markdown_table(output, table.schema(), table.iter_batches())
}
}

const TABLE_FUNCTIONS_QUERY: &str = r#"
SELECT
function_name as "Function name",
description as "Description"
FROM list_functions()
WHERE function_type = 'table'
GROUP BY "Function name", "Description"
ORDER BY "Function name";
"#;

#[derive(Debug)]
pub struct TableFunctionWriter;

impl SectionWriter for TableFunctionWriter {
fn write(&self, session: &DocsSession, output: &mut dyn fmt::Write) -> Result<()> {
let table = session.query(TABLE_FUNCTIONS_QUERY)?;
write_markdown_table(output, table.schema(), table.iter_batches())
}
}
Loading

0 comments on commit 626d9f9

Please sign in to comment.