Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test(chunk-upload): Add a test for uploading multiple debug files #2274

Merged
merged 1 commit into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
This directory contains a set of three macOS Rust executables, compiled as a debug build, and thus containing debug information.
We use these executables to test the chunk upload functionality of the Sentry CLI.
Binary file not shown.
Binary file not shown.
Binary file not shown.
97 changes: 96 additions & 1 deletion tests/integration/debug_files/upload.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::{fs, str};

use regex::bytes::Regex;

use crate::integration::{AssertCommand, MockEndpointBuilder, TestManager};
use crate::integration::{chunk_upload, AssertCommand, MockEndpointBuilder, TestManager};

/// This regex is used to extract the boundary from the content-type header.
/// We need to match the boundary, since it changes with each request.
Expand Down Expand Up @@ -285,3 +285,98 @@ fn ensure_correct_chunk_upload() {
.with_default_token()
.run_and_assert(AssertCommand::Success);
}

#[test]
/// This test verifies a correct chunk upload of multiple debug files.
fn chunk_upload_multiple_files() {
let expected_chunk_body = fs::read(
"tests/integration/_expected_requests/debug_files/upload/chunk_upload_multiple_files.bin",
)
.expect("expected chunk body file should be present");
// This is the boundary used in the expected request file.
// It was randomly generated when the expected request was recorded.
let boundary_of_expected_request = "------------------------b26LKrHFvpOPfwMoDhYNY8";

let is_first_assemble_call = AtomicBool::new(true);
TestManager::new()
.mock_endpoint(
MockEndpointBuilder::new("GET", "/api/0/organizations/wat-org/chunk-upload/")
.with_response_file("debug_files/get-chunk-upload.json"),
)
.mock_endpoint(
MockEndpointBuilder::new("POST", "/api/0/organizations/wat-org/chunk-upload/")
.with_response_fn(move |request| {
let boundary = chunk_upload::boundary_from_request(request)
.expect("content-type header should be a valid multipart/form-data header");

let body = request.body().expect("body should be readable");

let chunks = chunk_upload::split_chunk_body(body, boundary)
.expect("body should be a valid multipart/form-data body");

let expected_chunks = chunk_upload::split_chunk_body(
&expected_chunk_body,
boundary_of_expected_request,
)
.expect("expected chunk body is a valid multipart/form-data body");

// Using assert! because in case of failure, the output with assert_eq!
// is too long to be useful.
assert!(
chunks == expected_chunks,
"Uploaded chunks differ from the expected chunks"
);

vec![]
}),
)
.mock_endpoint(
MockEndpointBuilder::new(
"POST",
"/api/0/projects/wat-org/wat-project/files/difs/assemble/",
)
.with_header_matcher("content-type", "application/json")
.with_response_fn(move |_| {
if is_first_assemble_call.swap(false, Ordering::Relaxed) {
r#"{
"6e217f035ed538d4d6c14129baad5cb52e680e74": {
"state": "not_found",
"missingChunks": ["6e217f035ed538d4d6c14129baad5cb52e680e74"]
},
"500848b7815119669a292f2ae1f44af11d7aa2d3": {
"state": "not_found",
"missingChunks": ["500848b7815119669a292f2ae1f44af11d7aa2d3"]
},
"fc27d95861d56fe16a2b66150e31652b76e8c678": {
"state": "not_found",
"missingChunks": ["fc27d95861d56fe16a2b66150e31652b76e8c678"]
}
}"#
} else {
r#"{
"6e217f035ed538d4d6c14129baad5cb52e680e74": {
"state": "created",
"missingChunks": []
},
"500848b7815119669a292f2ae1f44af11d7aa2d3": {
"state": "created",
"missingChunks": []
},
"fc27d95861d56fe16a2b66150e31652b76e8c678": {
"state": "created",
"missingChunks": []
}
}"#
}
.into()
})
.expect(2),
)
.assert_cmd(vec![
"debug-files",
"upload",
"tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files",
])
.with_default_token()
.run_and_assert(AssertCommand::Success);
}
2 changes: 1 addition & 1 deletion tests/integration/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ use std::io;
use std::path::Path;

use test_utils::MockEndpointBuilder;
use test_utils::{env, AssertCommand, ChunkOptions, ServerBehavior, TestManager};
use test_utils::{chunk_upload, env, AssertCommand, ChunkOptions, ServerBehavior, TestManager};

pub const UTC_DATE_FORMAT: &str = r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{6,9}Z";
const VERSION: &str = env!("CARGO_PKG_VERSION");
Expand Down
107 changes: 107 additions & 0 deletions tests/integration/test_utils/chunk_upload.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
//! Utilities for chunk upload tests.
use std::collections::HashSet;
use std::error::Error;
use std::str;
use std::sync::LazyLock;

use mockito::Request;
use regex::bytes::Regex;

/// This regex is used to extract the boundary from the content-type header.
/// We need to match the boundary, since it changes with each request.
/// The regex matches the format as specified in
/// https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html.
static CONTENT_TYPE_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r#"^multipart\/form-data; boundary=(?<boundary>[\w'\(\)+,\-\.\/:=? ]{0,69}[\w'\(\)+,\-\.\/:=?])$"#
)
.expect("Regex is valid")
});

/// A trait which abstracts over accessing headers from a mock request.
/// Allows future compatibility in case we switch to a different mock library.
pub trait HeaderContainer {
fn header(&self, header_name: &str) -> Vec<&[u8]>;
}

impl HeaderContainer for Request {
fn header(&self, header_name: &str) -> Vec<&[u8]> {
self.header(header_name)
.iter()
.map(|h| h.as_bytes())
.collect()
}
}

/// Split a multipart/form-data body into its constituent chunks.
/// The chunks are returned as a set, since chunk uploading code
/// does not guarantee any specific order of the chunks in the body.
/// We only want to check the invariant that each expected chunk is
/// in the body, not the order of the chunks.
pub fn split_chunk_body<'b>(
body: &'b [u8],
boundary: &str,
) -> Result<HashSet<&'b [u8]>, Box<dyn Error>> {
let escaped_boundary = regex::escape(boundary);

let inner_body = entire_body_regex(&escaped_boundary)
.captures(body)
.ok_or("body does not match multipart form regex")?
.name("body")
.expect("the regex has a \"body\" capture group which should always match")
.as_bytes();

// Using HashSet does have the small disadvantage that we don't
// preserve the count of any duplicate chunks, so our tests will
// fail to detect when the same chunk is included multiple times
// (this would be a bug). But, this way, we don't need to keep
// track of counts of chunks.
Ok(boundary_regex(&escaped_boundary)
.split(inner_body)
.collect())
}

/// Extract the boundary from a multipart/form-data request content-type header.
/// Returns an error if the content-type header is not present exactly once,
/// if the content-type does not match the multipart/form-data regex, or if the
/// boundary is not valid UTF-8.
pub fn boundary_from_request(request: &impl HeaderContainer) -> Result<&str, Box<dyn Error>> {
let content_type_headers = request.header("content-type");

if content_type_headers.len() != 1 {
return Err(format!(
"content-type header should be present exactly once, found {} times",
content_type_headers.len()
)
.into());
}

let content_type = content_type_headers[0];

let boundary = CONTENT_TYPE_REGEX
.captures(content_type)
.ok_or("content-type does not match multipart/form-data regex")?
.name("boundary")
.expect("if the regex matches, the boundary should match as well.")
.as_bytes();

Ok(str::from_utf8(boundary)?)
}

/// Given the regex-escaped boundary of a multipart form, return a regex which
/// should match the entire body of the form. The regex includes a named capture
/// group for the body (named "body"), which includes everything from the first starting
/// boundary to the final ending boundary (non-inclusive of the boundaries).
/// May panic if the boundary is not regex-escaped.
fn entire_body_regex(regex_escaped_boundary: &str) -> Regex {
Regex::new(&format!(
r#"^--{regex_escaped_boundary}(?<body>(?s-u:.*?))--{regex_escaped_boundary}--\s*$"#
))
.expect("This regex should be valid")
}

/// Given the regex-escaped boundary of a multipart form, return a regex which
/// matches the start of a section of the form.
fn boundary_regex(regex_escaped_boundary: &str) -> Regex {
Regex::new(&format!(r#"--{regex_escaped_boundary}"#)).expect("This regex should be valid")
}
1 change: 1 addition & 0 deletions tests/integration/test_utils/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! A collection of utilities for integration tests.

pub mod chunk_upload;
pub mod env;

mod mock_common_endpoints;
Expand Down
Loading