Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Get::html() for all platforms #163

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions examples/set_html.rs → examples/set_get_html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,7 @@ consectetur adipiscing elit."#;

ctx.set_html(html, Some(alt_text)).unwrap();
thread::sleep(Duration::from_secs(5));

let success = ctx.get().html().unwrap() == html;
println!("Set and Get html operations were successful: {success}");
}
14 changes: 14 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,11 @@ impl Get<'_> {
pub fn image(self) -> Result<ImageData<'static>, Error> {
self.platform.image()
}

/// Completes the "get" operation by fetching HTML from the clipboard.
pub fn html(self) -> Result<String, Error> {
self.platform.html()
}
}

/// A builder for an operation that sets a value to the clipboard.
Expand Down Expand Up @@ -322,6 +327,15 @@ mod tests {
ctx.set_html(html, Some(alt_text)).unwrap();
assert_eq!(ctx.get_text().unwrap(), alt_text);
}
{
let mut ctx = Clipboard::new().unwrap();

let html = "<b>hello</b> <i>world</i>!";

ctx.set().html(html, None).unwrap();

assert_eq!(ctx.get().html().unwrap(), html);
}
#[cfg(feature = "image-data")]
{
let mut ctx = Clipboard::new().unwrap();
Expand Down
8 changes: 8 additions & 0 deletions src/platform/linux/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,14 @@ impl<'clipboard> Get<'clipboard> {
Clipboard::WlDataControl(clipboard) => clipboard.get_image(self.selection),
}
}

pub(crate) fn html(self) -> Result<String, Error> {
match self.clipboard {
Clipboard::X11(clipboard) => clipboard.get_html(self.selection),
#[cfg(feature = "wayland-data-control")]
Clipboard::WlDataControl(clipboard) => clipboard.get_html(self.selection),
}
}
}

/// Linux-specific extensions to the [`Get`](super::Get) builder.
Expand Down
18 changes: 14 additions & 4 deletions src/platform/linux/wayland.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,12 @@ impl Clipboard {
Ok(Self {})
}

pub(crate) fn get_text(&mut self, selection: LinuxClipboardKind) -> Result<String, Error> {
use wl_clipboard_rs::paste::MimeType;

let result = get_contents(selection.try_into()?, Seat::Unspecified, MimeType::Text);
fn string_for_mime(
&mut self,
selection: LinuxClipboardKind,
mime: paste::MimeType,
) -> Result<String, Error> {
let result = get_contents(selection.try_into()?, Seat::Unspecified, mime);
match result {
Ok((mut pipe, _)) => {
let mut contents = vec![];
Expand All @@ -74,6 +76,10 @@ impl Clipboard {
}
}

pub(crate) fn get_text(&mut self, selection: LinuxClipboardKind) -> Result<String, Error> {
self.string_for_mime(selection, paste::MimeType::Text)
}

pub(crate) fn set_text(
&self,
text: Cow<'_, str>,
Expand All @@ -91,6 +97,10 @@ impl Clipboard {
Ok(())
}

pub(crate) fn get_html(&mut self, selection: LinuxClipboardKind) -> Result<String, Error> {
self.string_for_mime(selection, paste::MimeType::Specific("text/html"))
}

pub(crate) fn set_html(
&self,
html: Cow<'_, str>,
Expand Down
6 changes: 6 additions & 0 deletions src/platform/linux/x11.rs
Original file line number Diff line number Diff line change
Expand Up @@ -885,6 +885,12 @@ impl Clipboard {
self.inner.write(data, selection, wait)
}

pub(crate) fn get_html(&self, selection: LinuxClipboardKind) -> Result<String> {
let formats = [self.inner.atoms.HTML];
let result = self.inner.read(&formats, selection)?;
String::from_utf8(result.bytes).map_err(|_| Error::ConversionFailure)
}

pub(crate) fn set_html(
&self,
html: Cow<'_, str>,
Expand Down
58 changes: 38 additions & 20 deletions src/platform/osx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,27 @@ impl Clipboard {
unsafe { self.pasteboard.clearContents() };
}

fn string_from_type(&self, type_: &'static NSString) -> Result<String, Error> {
// XXX: There does not appear to be an alternative for obtaining text without the need for
// autorelease behavior.
autoreleasepool(|_| {
// XXX: We explicitly use `pasteboardItems` and not `stringForType` since the latter will concat
// multiple strings, if present, into one and return it instead of reading just the first which is `arboard`'s
// historical behavior.
let contents = unsafe { self.pasteboard.pasteboardItems() }.ok_or_else(|| {
Error::Unknown { description: String::from("NSPasteboard#pasteboardItems errored") }
})?;

for item in contents {
if let Some(string) = unsafe { item.stringForType(type_) } {
return Ok(string.to_string());
}
}

Err(Error::ContentNotAvailable)
})
}

// fn get_binary_contents(&mut self) -> Result<Option<ClipboardContent>, Box<dyn std::error::Error>> {
// let string_class: Id<NSObject> = {
// let cls: Id<Class> = unsafe { Id::from_ptr(class("NSString")) };
Expand Down Expand Up @@ -182,27 +203,12 @@ impl<'clipboard> Get<'clipboard> {
}

pub(crate) fn text(self) -> Result<String, Error> {
// XXX: There does not appear to be an alternative for obtaining text without the need for
// autorelease behavior.
autoreleasepool(|_| {
// XXX: We explicitly use `pasteboardItems` and not `stringForType` since the latter will concat
// multiple strings, if present, into one and return it instead of reading just the first which is `arboard`'s
// historical behavior.
let contents =
unsafe { self.clipboard.pasteboard.pasteboardItems() }.ok_or_else(|| {
Error::Unknown {
description: String::from("NSPasteboard#pasteboardItems errored"),
}
})?;

for item in contents {
if let Some(string) = unsafe { item.stringForType(NSPasteboardTypeString) } {
return Ok(string.to_string());
}
}
unsafe { self.clipboard.string_from_type(NSPasteboardTypeString) }
}

Err(Error::ContentNotAvailable)
})
pub(crate) fn html(self) -> Result<String, Error> {
let html = unsafe { self.clipboard.string_from_type(NSPasteboardTypeHTML) }?;
extract_html(html).ok_or(Error::ConversionFailure)
}

#[cfg(feature = "image-data")]
Expand Down Expand Up @@ -347,6 +353,18 @@ fn add_clipboard_exclusions(clipboard: &mut Clipboard, exclude_from_history: boo
}
}

fn extract_html(html: String) -> Option<String> {
let start_tag = "<body>";
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: Are these guaranteed to appear in enough cases? For example in Firefox I see <body> tags wrapping the content when I copy from example.com. However if I copy the same domain out of Chrome it directly starts with <h1>. If I copy a nested block out of Logseq it omits them and starts directly with <ul> as the top tag too.

As-is, IIUC, this will miss a lot of valid HTML cases from real browsers today because it returns None if the <body> wrapper can't be found.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually made a PR on @Gae24's fork to remove this function. I also think it's not necessary and should be left to the consuming end, if needed.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's definitely a question worth considering either way. I don't have a Linux system available to test this currently but on Windows I am returned a <div> directly when copying from Firefox and the same <h1> as Chrome returns on macOS.

If we wanted to keep the behavior closer between the platforms we could strip out <body> and anything before it on both sides of the HTML.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd guess what's written to the clipboard is entirely application-dependent. Applications other than browsers (Word or similar) probably have a different format as well.

Which leads me to say transforming the clipboard HTML content would have to be left to the consumer.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I remember correctly it was added because some tests were failing. Testing example.com on Linux while Firefox wraps the content with <meta http-equiv="content-type" content="text/html; charset=utf-8"><div></div>, Edge that's still Chromium, simply gives <h1>

let end_tag = "</body>";

// Locate the start index of the <body> tag
let start_index = html.find(start_tag)? + start_tag.len();
// Locate the end index of the </body> tag
let end_index = html.find(end_tag)?;

Some(html[start_index..end_index].to_string())
Comment on lines +361 to +365
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: drop(String::drain(...)) twice may be more efficient for large blocks of HTML since it keeps the string.

}

/// Apple-specific extensions to the [`Set`](crate::Set) builder.
pub trait SetExtApple: private::Sealed {
/// Excludes the data which will be set on the clipboard from being added to
Expand Down
13 changes: 13 additions & 0 deletions src/platform/windows.rs
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,19 @@ impl<'clipboard> Get<'clipboard> {
String::from_utf16(&out[..bytes_read]).map_err(|_| Error::ConversionFailure)
}

pub(crate) fn html(self) -> Result<String, Error> {
let _clipboard_assertion = self.clipboard?;

let format = clipboard_win::register_format("HTML Format")
.ok_or_else(|| Error::unknown("unable to register HTML format"))?;

let mut out: Vec<u8> = Vec::new();
clipboard_win::raw::get_html(format.get(), &mut out)
.map_err(|_| Error::unknown("failed to read clipboard string"))?;

String::from_utf8(out).map_err(|_| Error::ConversionFailure)
}

#[cfg(feature = "image-data")]
pub(crate) fn image(self) -> Result<ImageData<'static>, Error> {
const FORMAT: u32 = clipboard_win::formats::CF_DIBV5;
Expand Down