diff --git a/docs/src/pages/img/papers/auto-compartmentalization-src.png b/docs/src/pages/img/papers/auto-compartmentalization-src.png index 8785f09babac..042616266336 100644 Binary files a/docs/src/pages/img/papers/auto-compartmentalization-src.png and b/docs/src/pages/img/papers/auto-compartmentalization-src.png differ diff --git a/docs/src/pages/img/papers/conor-papoc-2024.png b/docs/src/pages/img/papers/conor-papoc-2024.png index f0c3ce84da1e..83b1fb2a0998 100644 Binary files a/docs/src/pages/img/papers/conor-papoc-2024.png and b/docs/src/pages/img/papers/conor-papoc-2024.png differ diff --git a/docs/src/pages/img/papers/david-papoc-2024.png b/docs/src/pages/img/papers/david-papoc-2024.png index c73169aa34e8..da31070eb398 100644 Binary files a/docs/src/pages/img/papers/david-papoc-2024.png and b/docs/src/pages/img/papers/david-papoc-2024.png differ diff --git a/docs/src/pages/img/papers/david-sigmod-2024.png b/docs/src/pages/img/papers/david-sigmod-2024.png index f6751f75f505..ec1a62174cfb 100644 Binary files a/docs/src/pages/img/papers/david-sigmod-2024.png and b/docs/src/pages/img/papers/david-sigmod-2024.png differ diff --git a/docs/src/pages/img/papers/flo.png b/docs/src/pages/img/papers/flo.png new file mode 100644 index 000000000000..0bfb73dd8ded Binary files /dev/null and b/docs/src/pages/img/papers/flo.png differ diff --git a/docs/src/pages/img/papers/hydroflow-thesis.png b/docs/src/pages/img/papers/hydroflow-thesis.png index 40515491a2c3..4a11caf97105 100644 Binary files a/docs/src/pages/img/papers/hydroflow-thesis.png and b/docs/src/pages/img/papers/hydroflow-thesis.png differ diff --git a/docs/src/pages/img/papers/joe-applied-2023.png b/docs/src/pages/img/papers/joe-applied-2023.png index 8c2d958ec16b..9c829d5ded02 100644 Binary files a/docs/src/pages/img/papers/joe-applied-2023.png and b/docs/src/pages/img/papers/joe-applied-2023.png differ diff --git a/docs/src/pages/img/papers/katara.png b/docs/src/pages/img/papers/katara.png index eea111ce35ee..814bdffb83cd 100644 Binary files a/docs/src/pages/img/papers/katara.png and b/docs/src/pages/img/papers/katara.png differ diff --git a/docs/src/pages/img/papers/keep-calm-and-crdt-on.png b/docs/src/pages/img/papers/keep-calm-and-crdt-on.png index 714e5c493656..5edf3b579d1b 100644 Binary files a/docs/src/pages/img/papers/keep-calm-and-crdt-on.png and b/docs/src/pages/img/papers/keep-calm-and-crdt-on.png differ diff --git a/docs/src/pages/img/papers/new-directions.png b/docs/src/pages/img/papers/new-directions.png index a6d419e779d1..7e325e145346 100644 Binary files a/docs/src/pages/img/papers/new-directions.png and b/docs/src/pages/img/papers/new-directions.png differ diff --git a/docs/src/pages/img/papers/suki.png b/docs/src/pages/img/papers/suki.png new file mode 100644 index 000000000000..b3db39c9ead6 Binary files /dev/null and b/docs/src/pages/img/papers/suki.png differ diff --git a/docs/src/pages/img/papers/tiemo-cidr-2024.png b/docs/src/pages/img/papers/tiemo-cidr-2024.png index 0c0a9378f96d..4188c96c3c17 100644 Binary files a/docs/src/pages/img/papers/tiemo-cidr-2024.png and b/docs/src/pages/img/papers/tiemo-cidr-2024.png differ diff --git a/docs/src/pages/img/papers/tiemo-sigmod-2024.png b/docs/src/pages/img/papers/tiemo-sigmod-2024.png index bbddb6449653..7d519860207c 100644 Binary files a/docs/src/pages/img/papers/tiemo-sigmod-2024.png and b/docs/src/pages/img/papers/tiemo-sigmod-2024.png differ diff --git a/docs/src/pages/research.js b/docs/src/pages/research.js index 6c01039f536b..5270d26891d3 100644 --- a/docs/src/pages/research.js +++ b/docs/src/pages/research.js @@ -6,13 +6,26 @@ import Image from '@theme/IdealImage'; import styles from './research.module.css'; const papers = [ + { + title: "Flo: a Semantic Foundation for Progressive Stream Processing", + pdf: "pathname:///papers/flo.pdf", + thumb: require("./img/papers/flo.png"), + authors: <>Shadaj Laddad, Alvin Cheung, Joseph M. Hellerstein, Mae Milano, + description: [ + <>Existing streaming languages have a variety of semantic models and guarantees that are often incompatible. In this paper, we identify two general yet precise semantic properties: streaming progress and eager execution. We formally define these properties in the context of Flo, a parameterized streaming language that abstracts over dataflow operators and the underlying structure of streams., + <>To demonstrate the generality of our properties, we show how key ideas from representative streaming and incremental computation systems—Flink, LVars, and DBSP—have semantics that can be modeled in Flo and guarantees that map to our properties. + ], + conf: "POPL 2025", + links: <>PDF / arXiv + }, { title: "Optimizing Distributed Protocols with Query Rewrites", pdf: "pathname:///papers/david-sigmod-2024.pdf", thumb: require("./img/papers/david-sigmod-2024.png"), authors: <>David Chu, Rithvik Panchapakesan, Shadaj Laddad, Lucky Katahanas, Chris Liu, Kaushik Shivakumar, Natacha Crooks, Joseph M. Hellerstein, & Heidi Howard, description: [ - <>Distributed protocols such as 2PC and Paxos lie at the core of many systems in the cloud, but standard implementations do not scale. New scalable distributed protocols are developed through careful analysis and rewrites, but this process is ad hoc and error-prone. This paper presents an approach for scaling any distributed protocol by applying rule-driven rewrites, borrowing from query optimization. Distributed protocol rewrites entail a new burden: reasoning about spatiotemporal correctness. We leverage order-insensitivity and data dependency analysis to systematically identify correct coordination-free scaling opportunities. We apply this analysis to create preconditions and mechanisms for coordination-free decoupling and partitioning, two fundamental vertical and horizontal scaling techniques. Manual rule-driven applications of decoupling and partitioning improve the throughput of 2PC by 5x and Paxos by 3x, and match state-of-the-art throughput in recent work. These results point the way toward automated optimizers for distributed protocols based on correct-by-construction rewrite rules. + <>Distributed protocols such as 2PC and Paxos lie at the core of many systems in the cloud, but standard implementations do not scale. New scalable distributed protocols are developed through careful analysis and rewrites, but this process is ad hoc and error-prone. This paper presents an approach for scaling any distributed protocol by applying rule-driven rewrites, borrowing from query optimization., + <>Distributed protocol rewrites entail a new burden: reasoning about spatiotemporal correctness. We leverage order-insensitivity and data dependency analysis to systematically identify correct coordination-free scaling opportunities. We apply this analysis to create preconditions and mechanisms for coordination-free decoupling and partitioning, two fundamental vertical and horizontal scaling techniques. Manual rule-driven applications of decoupling and partitioning improve the throughput of 2PC by 5x and Paxos by 3x, and match state-of-the-art throughput in recent work. These results point the way toward automated optimizers for distributed protocols based on correct-by-construction rewrite rules. ], conf: "SIGMOD 2024", links: <>PDF / Tech Report / GitHub @@ -23,11 +36,24 @@ const papers = [ thumb: require("./img/papers/tiemo-sigmod-2024.png"), authors: <>Tiemo Bang, Chris Douglas, Natacha Crooks and Joseph M. Hellerstein, description: [ - <>Cloud object stores offer vastly different price points for object storage as a function of workload and geography. Poor object placement can thus lead to significant cost overheads. Prior cost-saving techniques attempt to optimize placement policies on the fly, deciding object placements for each object individually. In practice, these techniques do not scale to the size of the modern cloud. In this work, we leverage the static nature and pay-per-use pricing model of cloud environments to explore a different approach. Rather than computing object placements on the fly, we precompute a SkyPIE oracle---a lookup structure representing all possible placement policies and the workloads for which they are optimal. Internally, SkyPIE represents placement policies as a matrix of cost-hyperplanes, which we effectively precompute through pruning and convex optimization. By leveraging a fast geometric algorithm, online queries then are 1 to 8 orders of magnitude faster but as accurate as Integer-Linear-Programming. This makes exact optimization tractable for real workloads and we show >10x cost savings compared to state-of-the-art heuristic approaches. + <>Cloud object stores offer vastly different price points for object storage as a function of workload and geography. Poor object placement can thus lead to significant cost overheads. Prior cost-saving techniques attempt to optimize placement policies on the fly, deciding object placements for each object individually. In practice, these techniques do not scale to the size of the modern cloud. In this work, we leverage the static nature and pay-per-use pricing model of cloud environments to explore a different approach. Rather than computing object placements on the fly, we precompute a SkyPIE oracle---a lookup structure representing all possible placement policies and the workloads for which they are optimal., + <>Internally, SkyPIE represents placement policies as a matrix of cost-hyperplanes, which we effectively precompute through pruning and convex optimization. By leveraging a fast geometric algorithm, online queries then are 1 to 8 orders of magnitude faster but as accurate as Integer-Linear-Programming. This makes exact optimization tractable for real workloads and we show {">"}10x cost savings compared to state-of-the-art heuristic approaches. ], conf: "SIGMOD 2024", links: <>PDF / GitHub }, + { + title: "Suki: Choreographed Distributed Dataflow in Rust", + pdf: "pathname:///papers/suki.pdf", + thumb: require("./img/papers/suki.png"), + authors: <>Shadaj Laddad, Alvin Cheung, Joseph M. Hellerstein, + description: [ + <>Programming models for distributed dataflow have long focused on analytical workloads that allow the runtime to dynamically place and schedule compute logic. Meanwhile, models that enable fine-grained control over placement, such as actors, make global optimization difficult. In this extended abstract, we present Suki, an embedded Rust DSL that lets developers implement streaming dataflow with explicit placement of computation., + <>Key to this choreographic programming approach is our use of staged programming, which lets us expose a high-level Rust API while compiling local compute units into individual binaries with zero-overhead. + ], + conf: "CP 2024", + links: <>PDF / arXiv + }, { title: "Bigger, not Badder: Safely Scaling BFT Protocols", pdf: "pathname:///papers/david-papoc-2024.pdf", @@ -191,14 +217,20 @@ export default function Home() { {paper.conf} -

+

{paper["title"]}

{paper["authors"]}

-

{paper.description[0]} {paper.description[1]}

+

{paper.description[0]} {paper.description[1]}

{paper.links}

diff --git a/docs/static/papers/flo.pdf b/docs/static/papers/flo.pdf new file mode 100644 index 000000000000..146dab24a520 Binary files /dev/null and b/docs/static/papers/flo.pdf differ diff --git a/docs/static/papers/suki.pdf b/docs/static/papers/suki.pdf new file mode 100644 index 000000000000..e45a0aae7981 Binary files /dev/null and b/docs/static/papers/suki.pdf differ