-
Notifications
You must be signed in to change notification settings - Fork 42
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
566 additions
and
0 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
[package] | ||
name = "docgen" | ||
version.workspace = true | ||
edition.workspace = true | ||
|
||
[dependencies] | ||
logutil = { path = '../logutil' } | ||
rayexec_error = { path = '../rayexec_error' } | ||
rayexec_execution = { path = '../rayexec_execution' } | ||
rayexec_server = { path = '../rayexec_server' } | ||
rayexec_shell = { path = '../rayexec_shell' } | ||
rayexec_rt_native = { path = '../rayexec_rt_native' } | ||
rayexec_bullet = { path = '../rayexec_bullet' } | ||
rayexec_postgres = { path = '../rayexec_postgres' } | ||
rayexec_parquet = { path = '../rayexec_parquet' } | ||
rayexec_csv = { path = '../rayexec_csv' } | ||
rayexec_delta = { path = '../rayexec_delta' } | ||
rayexec_unity_catalog = { path = '../rayexec_unity_catalog' } | ||
rayexec_iceberg = { path = '../rayexec_iceberg' } | ||
rayexec_debug = { path = '../rayexec_debug' } | ||
regex = { workspace = true } | ||
tracing = { workspace = true } | ||
tokio = { workspace = true, default-features = false, features = ["rt", "rt-multi-thread", "time", "net"] } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
use std::fmt::Write as _; | ||
use std::fs; | ||
use std::io::{BufRead, BufReader, BufWriter, Write}; | ||
use std::sync::LazyLock; | ||
|
||
use rayexec_error::{RayexecError, Result}; | ||
use regex::Regex; | ||
use tracing::info; | ||
|
||
use crate::section::SectionWriter; | ||
use crate::session::DocsSession; | ||
|
||
static DOCSGEN_START_REGEX: LazyLock<Regex> = | ||
LazyLock::new(|| Regex::new(r"<!--\s*DOCSGEN_START\s+([a-zA-Z0-9_]+)\s*-->").unwrap()); | ||
|
||
static DOCSGEN_END_REGEX: LazyLock<Regex> = | ||
LazyLock::new(|| Regex::new(r"<!--\s*DOCSGEN_END\s*-->").unwrap()); | ||
|
||
fn expand_path(path: &str) -> String { | ||
format!("{}/../../{}", env!("CARGO_MANIFEST_DIR"), path) | ||
} | ||
|
||
#[derive(Debug)] | ||
pub struct DocFile { | ||
pub path: &'static str, | ||
pub sections: &'static [(&'static str, &'static dyn SectionWriter)], | ||
} | ||
|
||
impl DocFile { | ||
pub fn overwrite(&self, session: &DocsSession) -> Result<()> { | ||
let path = expand_path(self.path); | ||
info!(%path, "expanded path"); | ||
|
||
let file = fs::OpenOptions::new().read(true).write(true).open(&path)?; | ||
|
||
let lines: Vec<String> = BufReader::new(&file).lines().collect::<Result<_, _>>()?; | ||
|
||
// Write to buffer instead of file directly in case we error early. | ||
let mut buf = String::new(); | ||
|
||
let mut in_docsgen_section = false; | ||
|
||
for (idx, line) in lines.iter().enumerate() { | ||
match DOCSGEN_START_REGEX.captures(line) { | ||
Some(captures) => { | ||
if in_docsgen_section { | ||
return Err(RayexecError::new("Cannot nest docsgen sections") | ||
.with_field("line_number", idx + 1)); | ||
} | ||
in_docsgen_section = true; | ||
|
||
let section_name = captures.get(1).unwrap().as_str(); | ||
|
||
let section = self | ||
.sections | ||
.iter() | ||
.find_map(|(name, section)| { | ||
if *name == section_name { | ||
Some(section) | ||
} else { | ||
None | ||
} | ||
}) | ||
.ok_or_else(|| { | ||
RayexecError::new(format!("Missing docs section: {section_name}")) | ||
})?; | ||
|
||
// Write original line + extra newline | ||
writeln!(buf, "{}", line)?; | ||
writeln!(buf)?; | ||
|
||
// Write out section. | ||
section.write(session, &mut buf)?; | ||
} | ||
None => { | ||
if DOCSGEN_END_REGEX.is_match(line.as_str()) { | ||
if !in_docsgen_section { | ||
return Err(RayexecError::new( | ||
"Found DOCSGEN_END tag when not in a docsgen section", | ||
) | ||
.with_field("line_number", idx + 1)); | ||
} | ||
|
||
in_docsgen_section = false; | ||
|
||
// Write extra newline + original line | ||
writeln!(buf)?; | ||
writeln!(buf, "{}", line)?; | ||
} else { | ||
// Only write out stuff outside of the docgen section. | ||
// We already wrote the new output, so we need to | ||
// discard the old stuff. | ||
if !in_docsgen_section { | ||
writeln!(buf, "{}", line)?; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
if in_docsgen_section { | ||
return Err(RayexecError::new( | ||
"Reached end of file, still in docsgen section", | ||
)); | ||
} | ||
|
||
let file = fs::OpenOptions::new() | ||
.write(true) | ||
.truncate(true) | ||
.open(&path)?; | ||
|
||
let mut writer = BufWriter::new(file); | ||
writer.write_all(buf.as_bytes())?; | ||
writer.flush()?; | ||
|
||
Ok(()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
mod file; | ||
mod markdown_table; | ||
mod section; | ||
mod session; | ||
|
||
use file::DocFile; | ||
use rayexec_csv::CsvDataSource; | ||
use rayexec_delta::DeltaDataSource; | ||
use rayexec_error::Result; | ||
use rayexec_execution::datasource::{DataSourceBuilder, DataSourceRegistry, MemoryDataSource}; | ||
use rayexec_iceberg::IcebergDataSource; | ||
use rayexec_parquet::ParquetDataSource; | ||
use rayexec_postgres::PostgresDataSource; | ||
use rayexec_rt_native::runtime::{NativeRuntime, ThreadedNativeExecutor}; | ||
use rayexec_shell::session::SingleUserEngine; | ||
use rayexec_unity_catalog::UnityCatalogDataSource; | ||
use section::{AggregateFunctionWriter, ScalarFunctionWriter, TableFunctionWriter}; | ||
use session::DocsSession; | ||
use tracing::info; | ||
|
||
const FILES: &[DocFile] = &[DocFile { | ||
path: "docs/sql/functions.md", | ||
sections: &[ | ||
("scalar_functions", &ScalarFunctionWriter), | ||
("aggregate_functions", &AggregateFunctionWriter), | ||
("table_functions", &TableFunctionWriter), | ||
], | ||
}]; | ||
|
||
fn main() -> Result<()> { | ||
logutil::configure_global_logger(tracing::Level::INFO, logutil::LogFormat::HumanReadable); | ||
|
||
info!("starting docs gen"); | ||
|
||
let executor = ThreadedNativeExecutor::try_new().unwrap(); | ||
let runtime = NativeRuntime::with_default_tokio().unwrap(); | ||
|
||
let registry = DataSourceRegistry::default() | ||
.with_datasource("memory", Box::new(MemoryDataSource))? | ||
.with_datasource("postgres", PostgresDataSource::initialize(runtime.clone()))? | ||
.with_datasource("delta", DeltaDataSource::initialize(runtime.clone()))? | ||
.with_datasource("unity", UnityCatalogDataSource::initialize(runtime.clone()))? | ||
.with_datasource("parquet", ParquetDataSource::initialize(runtime.clone()))? | ||
.with_datasource("csv", CsvDataSource::initialize(runtime.clone()))? | ||
.with_datasource("iceberg", IcebergDataSource::initialize(runtime.clone()))?; | ||
let engine = SingleUserEngine::try_new(executor, runtime, registry)?; | ||
let session = DocsSession { engine }; | ||
|
||
for file in FILES { | ||
info!(%file.path, "handing file"); | ||
file.overwrite(&session)?; | ||
} | ||
|
||
info!("completed all files"); | ||
|
||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
use std::fmt; | ||
|
||
use rayexec_bullet::batch::Batch; | ||
use rayexec_bullet::field::Schema; | ||
use rayexec_bullet::format::{FormatOptions, Formatter}; | ||
use rayexec_error::Result; | ||
|
||
const FORMATTER: Formatter = Formatter::new(FormatOptions { | ||
null: "", | ||
empty_string: "", | ||
}); | ||
|
||
pub fn write_markdown_table<'a>( | ||
output: &mut dyn fmt::Write, | ||
schema: &Schema, | ||
batches: impl IntoIterator<Item = &'a Batch>, | ||
) -> Result<()> { | ||
// 'field1 | field2 | field3' | ||
let header = schema | ||
.fields | ||
.iter() | ||
.map(|f| f.name.clone()) | ||
.collect::<Vec<_>>() | ||
.join(" | "); | ||
|
||
writeln!(output, "| {header} |")?; | ||
|
||
// ' --- | --- | ---' | ||
let sep = schema | ||
.fields | ||
.iter() | ||
.map(|_| "---") | ||
.collect::<Vec<_>>() | ||
.join(" | "); | ||
|
||
writeln!(output, "| {sep} |")?; | ||
|
||
for batch in batches { | ||
for row in 0..batch.num_rows() { | ||
for (idx, column) in batch.columns().iter().enumerate() { | ||
if idx == 0 { | ||
write!(output, "|")?; | ||
} | ||
|
||
let val = FORMATTER.format_array_value(column, row)?; | ||
write!(output, " {val} |")?; | ||
} | ||
writeln!(output)?; | ||
} | ||
} | ||
|
||
Ok(()) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use rayexec_bullet::array::Array; | ||
use rayexec_bullet::datatype::DataType; | ||
use rayexec_bullet::field::Field; | ||
|
||
use super::*; | ||
|
||
#[test] | ||
fn simple() { | ||
let batch = Batch::try_new([ | ||
Array::from_iter([1, 2, 3]), | ||
Array::from_iter(["cat", "dog", "mouse"]), | ||
]) | ||
.unwrap(); | ||
|
||
let schema = Schema::new([ | ||
Field::new("Numbers", DataType::Int32, false), | ||
Field::new("Strings", DataType::Utf8, false), | ||
]); | ||
|
||
let mut buf = String::new(); | ||
|
||
write_markdown_table(&mut buf, &schema, [&batch]).unwrap(); | ||
|
||
let expected = r#"| Numbers | Strings | | ||
| --- | --- | | ||
| 1 | cat | | ||
| 2 | dog | | ||
| 3 | mouse | | ||
"#; | ||
|
||
assert_eq!(expected, buf); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
use std::fmt::{self, Debug}; | ||
|
||
use rayexec_error::Result; | ||
|
||
use crate::markdown_table::write_markdown_table; | ||
use crate::session::DocsSession; | ||
|
||
pub trait SectionWriter: Debug { | ||
fn write(&self, session: &DocsSession, output: &mut dyn fmt::Write) -> Result<()>; | ||
} | ||
|
||
const SCALAR_FUNCTIONS_QUERY: &str = r#" | ||
SELECT | ||
function_name as "Function name", | ||
description as "Description" | ||
FROM list_functions() | ||
WHERE function_type = 'scalar' | ||
GROUP BY "Function name", "Description" | ||
ORDER BY "Function name"; | ||
"#; | ||
|
||
#[derive(Debug)] | ||
pub struct ScalarFunctionWriter; | ||
|
||
impl SectionWriter for ScalarFunctionWriter { | ||
fn write(&self, session: &DocsSession, output: &mut dyn fmt::Write) -> Result<()> { | ||
let table = session.query(SCALAR_FUNCTIONS_QUERY)?; | ||
write_markdown_table(output, table.schema(), table.iter_batches()) | ||
} | ||
} | ||
|
||
const AGGREGATE_FUNCTIONS_QUERY: &str = r#" | ||
SELECT | ||
function_name as "Function name", | ||
description as "Description" | ||
FROM list_functions() | ||
WHERE function_type = 'aggregate' | ||
GROUP BY "Function name", "Description" | ||
ORDER BY "Function name"; | ||
"#; | ||
|
||
#[derive(Debug)] | ||
pub struct AggregateFunctionWriter; | ||
|
||
impl SectionWriter for AggregateFunctionWriter { | ||
fn write(&self, session: &DocsSession, output: &mut dyn fmt::Write) -> Result<()> { | ||
let table = session.query(AGGREGATE_FUNCTIONS_QUERY)?; | ||
write_markdown_table(output, table.schema(), table.iter_batches()) | ||
} | ||
} | ||
|
||
const TABLE_FUNCTIONS_QUERY: &str = r#" | ||
SELECT | ||
function_name as "Function name", | ||
description as "Description" | ||
FROM list_functions() | ||
WHERE function_type = 'table' | ||
GROUP BY "Function name", "Description" | ||
ORDER BY "Function name"; | ||
"#; | ||
|
||
#[derive(Debug)] | ||
pub struct TableFunctionWriter; | ||
|
||
impl SectionWriter for TableFunctionWriter { | ||
fn write(&self, session: &DocsSession, output: &mut dyn fmt::Write) -> Result<()> { | ||
let table = session.query(TABLE_FUNCTIONS_QUERY)?; | ||
write_markdown_table(output, table.schema(), table.iter_batches()) | ||
} | ||
} |
Oops, something went wrong.