From 50ab9ced5a5f78e6675766f5477469f44a800572 Mon Sep 17 00:00:00 2001 From: deedy5 <65482418+deedy5@users.noreply.github.com> Date: Tue, 13 Aug 2024 18:25:57 +0300 Subject: [PATCH] move get encoding to response struct, make encoding setter, request future: bypass unnecessary response items --- src/lib.rs | 43 ++++++++++++------------------------------- src/response.rs | 40 ++++++++++++++++++++++++++++------------ 2 files changed, 40 insertions(+), 43 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0ed191a..fff185b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,7 +6,7 @@ use ahash::RandomState; use anyhow::{anyhow, Result}; use indexmap::IndexMap; use pyo3::prelude::*; -use pyo3::types::{PyBytes, PyDict, PyString}; +use pyo3::types::{PyBytes, PyDict}; use rquest::header::{HeaderMap, HeaderName, HeaderValue, COOKIE}; use rquest::tls::Impersonate; use rquest::multipart; @@ -18,7 +18,7 @@ mod response; use response::Response; mod utils; -use utils::{get_encoding_from_content, get_encoding_from_headers, json_dumps, url_encode}; +use utils::{json_dumps, url_encode}; // Tokio global one-thread runtime fn runtime() -> &'static Runtime { @@ -359,42 +359,23 @@ impl Client { let status_code = resp.status().as_u16(); let url = resp.url().to_string(); let buf = resp.bytes().await?; - let encoding = get_encoding_from_headers(&headers) - .or_else(|| get_encoding_from_content(&buf)) - .unwrap_or_else(|| "UTF-8".to_string()); - Ok((buf, cookies, encoding, headers, status_code, url)) + + log::info!("response: {} {} {}", url, status_code, buf.len()); + Ok((buf, cookies, headers, status_code, url)) }; // Execute an async future, releasing the Python GIL for concurrency. // Use Tokio global runtime to block on the future. let result = py.allow_threads(|| runtime().block_on(future)); - let (f_buf, f_cookies, f_encoding, f_headers, f_status_code, f_url) = result?; - - // Response items - let cookies_dict = PyDict::new_bound(py); - for (key, value) in f_cookies { - cookies_dict.set_item(key, value)?; - } - let cookies = cookies_dict.unbind(); - let encoding = PyString::new_bound(py, f_encoding.as_str()).unbind(); - let headers_dict = PyDict::new_bound(py); - for (key, value) in f_headers { - headers_dict.set_item(key, value)?; - } - let headers = headers_dict.unbind(); - let status_code = f_status_code.into_py(py); - let url = PyString::new_bound(py, &f_url).unbind(); - let content = PyBytes::new_bound(py, &f_buf).unbind(); - - log::info!("response: {} {} {} {}", f_url, f_status_code, f_buf.len(), f_encoding); + let (f_buf, f_cookies, f_headers, f_status_code, f_url) = result?; Ok(Response { - content, - cookies, - encoding, - headers, - status_code, - url, + content: PyBytes::new_bound(py, &f_buf).unbind(), + cookies: f_cookies, + encoding: String::new(), + headers: f_headers, + status_code: f_status_code, + url: f_url, }) } diff --git a/src/response.rs b/src/response.rs index ae8f267..56260b1 100644 --- a/src/response.rs +++ b/src/response.rs @@ -1,8 +1,10 @@ +use crate::utils::{get_encoding_from_content, get_encoding_from_headers}; +use ahash::RandomState; use anyhow::{anyhow, Result}; use encoding_rs::Encoding; use html2text::{from_read, from_read_with_decorator, render::text_renderer::TrivialDecorator}; -use pyo3::prelude::*; -use pyo3::types::{PyBytes, PyDict, PyString}; +use indexmap::IndexMap; +use pyo3::{prelude::*, types::PyBytes}; /// A struct representing an HTTP response. /// @@ -13,29 +15,43 @@ pub struct Response { #[pyo3(get)] pub content: Py, #[pyo3(get)] - pub cookies: Py, + pub cookies: IndexMap, + #[pyo3(get, set)] + pub encoding: String, #[pyo3(get)] - pub encoding: Py, + pub headers: IndexMap, #[pyo3(get)] - pub headers: Py, + pub status_code: u16, #[pyo3(get)] - pub status_code: Py, - #[pyo3(get)] - pub url: Py, + pub url: String, } #[pymethods] impl Response { + #[getter] + fn get_encoding(&mut self, py: Python) -> Result<&String> { + if !self.encoding.is_empty() { + return Ok(&self.encoding); + } + self.encoding = get_encoding_from_headers(&self.headers) + .or(get_encoding_from_content(&self.content.bind(py).as_bytes())) + .unwrap_or("UTF-8".to_string()); + Ok(&self.encoding) + } + #[getter] fn text(&mut self, py: Python) -> Result { - let encoding_name = &self.encoding.bind(py).to_string(); + // If self.encoding is empty, call get_encoding to populate self.encoding + if self.encoding.is_empty() { + self.get_encoding(py)?; + } // Convert Py to &[u8] let raw_bytes = &self.content.bind(py).as_bytes(); // Release the GIL here because decoding can be CPU-intensive let (decoded_str, detected_encoding_name) = py.allow_threads(|| { - let encoding_name_bytes = &encoding_name.as_bytes().to_vec(); + let encoding_name_bytes = &self.encoding.as_bytes(); let encoding = Encoding::for_label(encoding_name_bytes).ok_or({ anyhow!( "Unsupported charset: {}", @@ -51,8 +67,8 @@ impl Response { })?; // Update self.encoding based on the detected encoding - if encoding_name != &detected_encoding_name { - self.encoding = PyString::new_bound(py, &detected_encoding_name).into(); + if &self.encoding != &detected_encoding_name { + self.encoding = detected_encoding_name; } Ok(decoded_str)