getsentry · szokeasaurusrex · Nov 27, 2024 · Nov 27, 2024
diff --git a/tests/integration/_expected_requests/debug_files/upload/chunk_upload_multiple_files.bin b/tests/integration/_expected_requests/debug_files/upload/chunk_upload_multiple_files.bin
diff --git a/.../integration/_fixtures/debug_files/upload/chunk_upload_multiple_files/README.md b/.../integration/_fixtures/debug_files/upload/chunk_upload_multiple_files/README.md
@@ -0,0 +1,2 @@
+This directory contains a set of three macOS Rust executables, compiled as a debug build, and thus containing debug information.
+We use these executables to test the chunk upload functionality of the Sentry CLI.
diff --git a/tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files/fibonacci b/tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files/fibonacci
diff --git a/tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files/fibonacci-fast b/tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files/fibonacci-fast
diff --git a/tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files/main b/tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files/main
diff --git a/tests/integration/debug_files/upload.rs b/tests/integration/debug_files/upload.rs
@@ -4,7 +4,7 @@ use std::{fs, str};
 
 use regex::bytes::Regex;
 
-use crate::integration::{AssertCommand, MockEndpointBuilder, TestManager};
+use crate::integration::{chunk_upload, AssertCommand, MockEndpointBuilder, TestManager};
 
 /// This regex is used to extract the boundary from the content-type header.
 /// We need to match the boundary, since it changes with each request.
@@ -285,3 +285,98 @@ fn ensure_correct_chunk_upload() {
         .with_default_token()
         .run_and_assert(AssertCommand::Success);
 }
+
+#[test]
+/// This test verifies a correct chunk upload of multiple debug files.
+fn chunk_upload_multiple_files() {
+    let expected_chunk_body = fs::read(
+        "tests/integration/_expected_requests/debug_files/upload/chunk_upload_multiple_files.bin",
+    )
+    .expect("expected chunk body file should be present");
+    // This is the boundary used in the expected request file.
+    // It was randomly generated when the expected request was recorded.
+    let boundary_of_expected_request = "------------------------b26LKrHFvpOPfwMoDhYNY8";
+
+    let is_first_assemble_call = AtomicBool::new(true);
+    TestManager::new()
+        .mock_endpoint(
+            MockEndpointBuilder::new("GET", "/api/0/organizations/wat-org/chunk-upload/")
+                .with_response_file("debug_files/get-chunk-upload.json"),
+        )
+        .mock_endpoint(
+            MockEndpointBuilder::new("POST", "/api/0/organizations/wat-org/chunk-upload/")
+                .with_response_fn(move |request| {
+                    let boundary = chunk_upload::boundary_from_request(request)
+                        .expect("content-type header should be a valid multipart/form-data header");
+
+                    let body = request.body().expect("body should be readable");
+
+                    let chunks = chunk_upload::split_chunk_body(body, boundary)
+                        .expect("body should be a valid multipart/form-data body");
+
+                    let expected_chunks = chunk_upload::split_chunk_body(
+                        &expected_chunk_body,
+                        boundary_of_expected_request,
+                    )
+                    .expect("expected chunk body is a valid multipart/form-data body");
+
+                    // Using assert! because in case of failure, the output with assert_eq!
+                    // is too long to be useful.
+                    assert!(
+                        chunks == expected_chunks,
+                        "Uploaded chunks differ from the expected chunks"
+                    );
+
+                    vec![]
+                }),
+        )
+        .mock_endpoint(
+            MockEndpointBuilder::new(
+                "POST",
+                "/api/0/projects/wat-org/wat-project/files/difs/assemble/",
+            )
+            .with_header_matcher("content-type", "application/json")
+            .with_response_fn(move |_| {
+                if is_first_assemble_call.swap(false, Ordering::Relaxed) {
+                    r#"{
+                        "6e217f035ed538d4d6c14129baad5cb52e680e74": {
+                            "state": "not_found",
+                            "missingChunks": ["6e217f035ed538d4d6c14129baad5cb52e680e74"]
+                        },
+                        "500848b7815119669a292f2ae1f44af11d7aa2d3": {
+                            "state": "not_found",
+                            "missingChunks": ["500848b7815119669a292f2ae1f44af11d7aa2d3"]
+                        },
+                        "fc27d95861d56fe16a2b66150e31652b76e8c678": {
+                            "state": "not_found",
+                            "missingChunks": ["fc27d95861d56fe16a2b66150e31652b76e8c678"]
+                        }
+                    }"#
+                } else {
+                    r#"{
+                        "6e217f035ed538d4d6c14129baad5cb52e680e74": {
+                            "state": "created",
+                            "missingChunks": []
+                        },
+                        "500848b7815119669a292f2ae1f44af11d7aa2d3": {
+                            "state": "created",
+                            "missingChunks": []
+                        },
+                        "fc27d95861d56fe16a2b66150e31652b76e8c678": {
+                            "state": "created",
+                            "missingChunks": []
+                        }
+                    }"#
+                }
+                .into()
+            })
+            .expect(2),
+        )
+        .assert_cmd(vec![
+            "debug-files",
+            "upload",
+            "tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files",
+        ])
+        .with_default_token()
+        .run_and_assert(AssertCommand::Success);
+}
diff --git a/tests/integration/mod.rs b/tests/integration/mod.rs
@@ -31,7 +31,7 @@ use std::io;
 use std::path::Path;
 
 use test_utils::MockEndpointBuilder;
-use test_utils::{env, AssertCommand, ChunkOptions, ServerBehavior, TestManager};
+use test_utils::{chunk_upload, env, AssertCommand, ChunkOptions, ServerBehavior, TestManager};
 
 pub const UTC_DATE_FORMAT: &str = r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{6,9}Z";
 const VERSION: &str = env!("CARGO_PKG_VERSION");

diff --git a/tests/integration/test_utils/chunk_upload.rs b/tests/integration/test_utils/chunk_upload.rs
@@ -0,0 +1,107 @@
+//! Utilities for chunk upload tests.
+use std::collections::HashSet;
+use std::error::Error;
+use std::str;
+use std::sync::LazyLock;
+
+use mockito::Request;
+use regex::bytes::Regex;
+
+/// This regex is used to extract the boundary from the content-type header.
+/// We need to match the boundary, since it changes with each request.
+/// The regex matches the format as specified in
+/// https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html.
+static CONTENT_TYPE_REGEX: LazyLock<Regex> = LazyLock::new(|| {
+    Regex::new(
+        r#"^multipart\/form-data; boundary=(?<boundary>[\w'\(\)+,\-\.\/:=? ]{0,69}[\w'\(\)+,\-\.\/:=?])$"#
+    )
+    .expect("Regex is valid")
+});
+
+/// A trait which abstracts over accessing headers from a mock request.
+/// Allows future compatibility in case we switch to a different mock library.
+pub trait HeaderContainer {
+    fn header(&self, header_name: &str) -> Vec<&[u8]>;
+}
+
+impl HeaderContainer for Request {
+    fn header(&self, header_name: &str) -> Vec<&[u8]> {
+        self.header(header_name)
+            .iter()
+            .map(|h| h.as_bytes())
+            .collect()
+    }
+}
+
+/// Split a multipart/form-data body into its constituent chunks.
+/// The chunks are returned as a set, since chunk uploading code
+/// does not guarantee any specific order of the chunks in the body.
+/// We only want to check the invariant that each expected chunk is
+/// in the body, not the order of the chunks.
+pub fn split_chunk_body<'b>(
+    body: &'b [u8],
+    boundary: &str,
+) -> Result<HashSet<&'b [u8]>, Box<dyn Error>> {
+    let escaped_boundary = regex::escape(boundary);
+
+    let inner_body = entire_body_regex(&escaped_boundary)
+        .captures(body)
+        .ok_or("body does not match multipart form regex")?
+        .name("body")
+        .expect("the regex has a \"body\" capture group which should always match")
+        .as_bytes();
+
+    // Using HashSet does have the small disadvantage that we don't
+    // preserve the count of any duplicate chunks, so our tests will
+    // fail to detect when the same chunk is included multiple times
+    // (this would be a bug). But, this way, we don't need to keep
+    // track of counts of chunks.
+    Ok(boundary_regex(&escaped_boundary)
+        .split(inner_body)
+        .collect())
+}
+
+/// Extract the boundary from a multipart/form-data request content-type header.
+/// Returns an error if the content-type header is not present exactly once,
+/// if the content-type does not match the multipart/form-data regex, or if the
+/// boundary is not valid UTF-8.
+pub fn boundary_from_request(request: &impl HeaderContainer) -> Result<&str, Box<dyn Error>> {
+    let content_type_headers = request.header("content-type");
+
+    if content_type_headers.len() != 1 {
+        return Err(format!(
+            "content-type header should be present exactly once, found {} times",
+            content_type_headers.len()
+        )
+        .into());
+    }
+
+    let content_type = content_type_headers[0];
+
+    let boundary = CONTENT_TYPE_REGEX
+        .captures(content_type)
+        .ok_or("content-type does not match multipart/form-data regex")?
+        .name("boundary")
+        .expect("if the regex matches, the boundary should match as well.")
+        .as_bytes();
+
+    Ok(str::from_utf8(boundary)?)
+}
+
+/// Given the regex-escaped boundary of a multipart form, return a regex which
+/// should match the entire body of the form. The regex includes a named capture
+/// group for the body (named "body"), which includes everything from the first starting
+/// boundary to the final ending boundary (non-inclusive of the boundaries).
+/// May panic if the boundary is not regex-escaped.
+fn entire_body_regex(regex_escaped_boundary: &str) -> Regex {
+    Regex::new(&format!(
+        r#"^--{regex_escaped_boundary}(?<body>(?s-u:.*?))--{regex_escaped_boundary}--\s*$"#
+    ))
+    .expect("This regex should be valid")
+}
+
+/// Given the regex-escaped boundary of a multipart form, return a regex which
+/// matches the start of a section of the form.
+fn boundary_regex(regex_escaped_boundary: &str) -> Regex {
+    Regex::new(&format!(r#"--{regex_escaped_boundary}"#)).expect("This regex should be valid")
+}
diff --git a/tests/integration/test_utils/mod.rs b/tests/integration/test_utils/mod.rs
@@ -1,5 +1,6 @@
 //! A collection of utilities for integration tests.
 
+pub mod chunk_upload;
 pub mod env;
 
 mod mock_common_endpoints;