From 2a9a23e2e433dcd82ac1bfdce067894a80abeb96 Mon Sep 17 00:00:00 2001 From: zuisong Date: Mon, 14 Apr 2025 10:53:37 +0800 Subject: [PATCH] apply suggestion --- src/content_disposition.rs | 73 +++++++++++++++++++++++++++++--------- src/download.rs | 8 ++++- tests/cases/download.rs | 28 +++++++++++++-- 3 files changed, 89 insertions(+), 20 deletions(-) diff --git a/src/content_disposition.rs b/src/content_disposition.rs index 7cb0a5c..c598dc5 100644 --- a/src/content_disposition.rs +++ b/src/content_disposition.rs @@ -1,4 +1,4 @@ -use percent_encoding::percent_decode; +use percent_encoding::percent_decode_str; /// Parse filename from Content-Disposition header /// Prioritizes filename* parameter if present, otherwise uses filename parameter @@ -10,8 +10,8 @@ pub fn parse_filename_from_content_disposition(content_disposition: &str) -> Opt // First try to find filename* parameter for part in parts.iter() { - if part.starts_with("filename*=") { - if let Some(filename) = parse_encoded_filename(part) { + if let Some(value) = part.strip_prefix("filename*=") { + if let Some(filename) = parse_encoded_filename(value) { return Some(filename); } } @@ -19,8 +19,8 @@ pub fn parse_filename_from_content_disposition(content_disposition: &str) -> Opt // If filename* is not found or parsing failed, try regular filename parameter for part in parts { - if part.starts_with("filename=") { - return parse_regular_filename(part); + if let Some(value) = part.strip_prefix("filename=") { + return parse_regular_filename(value); } } @@ -29,10 +29,7 @@ pub fn parse_filename_from_content_disposition(content_disposition: &str) -> Opt /// Parse regular filename parameter /// Handles both quoted and unquoted filenames -fn parse_regular_filename(part: &str) -> Option { - let filename = part.trim_start_matches("filename="); - // Remove quotes if present - // +fn parse_regular_filename(filename: &str) -> Option { // Content-Disposition: attachment; filename="file with \"quotes\".txt" // This won't occur // Content-Disposition: attachment; filename*=UTF-8''file%20with%20quotes.txt // This is the actual practice // @@ -41,6 +38,8 @@ fn parse_regular_filename(part: &str) -> Option { // It's not a standard practice // It rarely occurs in real-world scenarios // When filenames contain special characters, they should use the filename* parameter + + // Remove quotes if present let filename = if filename.starts_with('"') && filename.ends_with('"') && filename.len() >= 2 { &filename[1..(filename.len() - 1)] } else { @@ -56,24 +55,36 @@ fn parse_regular_filename(part: &str) -> Option { /// Parse RFC 5987 encoded filename (filename*) /// Format: charset'language'encoded-value -fn parse_encoded_filename(part: &str) -> Option { +fn parse_encoded_filename(content: &str) -> Option { // Remove "filename*=" prefix - let content = part.trim_start_matches("filename*="); // According to RFC 5987, format should be: charset'language'encoded-value let parts: Vec<&str> = content.splitn(3, '\'').collect(); if parts.len() != 3 { return None; } - + let charset = parts[0]; let encoded_filename = parts[2]; - // Decode using percent-encoding - let decoded = percent_decode(encoded_filename.as_bytes()) - .decode_utf8() - .ok()?; + // Percent-decode the encoded filename into bytes. + let decoded_bytes = percent_decode_str(encoded_filename).collect::>(); - Some(decoded.into_owned()) + if charset.eq_ignore_ascii_case("UTF-8") { + if let Ok(decoded_str) = String::from_utf8(decoded_bytes) { + return Some(decoded_str); + } + } else if charset.eq_ignore_ascii_case("ISO-8859-1") { + // Use the encoding_rs crate to decode ISO-8859-1 bytes. + let decoded: String = decoded_bytes.iter().map(|&b| b as char).collect(); + return Some(decoded); + } else { + // Unknown charset. As a fallback, try interpreting as UTF-8. + if let Ok(decoded_str) = String::from_utf8(decoded_bytes) { + return Some(decoded_str); + } + } + + None } #[cfg(test)] @@ -119,9 +130,37 @@ mod tests { ); } + #[test] + fn test_both_filenames_with_bad_format() { + // When both filename and filename* are present, filename* with bad format, filename should be used + let header = r#"attachment; filename="fallback.pdf"; filename*=UTF-8'bad_format.pdf"#; + assert_eq!( + parse_filename_from_content_disposition(header), + Some("fallback.pdf".to_string()) + ); + } + #[test] fn test_no_filename() { let header = "attachment"; assert_eq!(parse_filename_from_content_disposition(header), None); } + + #[test] + fn test_iso_8859_1() { + let header = "attachment;filename*=iso-8859-1'en'%A3%20rates"; + assert_eq!( + parse_filename_from_content_disposition(header), + Some("£ rates".to_string()) + ); + } + + #[test] + fn test_bad_encoding_fallback_to_utf8() { + let header = "attachment;filename*=UTF-16''%E6%B5%8B%E8%AF%95.pdf"; + assert_eq!( + parse_filename_from_content_disposition(header), + Some("测试.pdf".to_string()) + ); + } } diff --git a/src/download.rs b/src/download.rs index 4d2ee49..16a7d39 100644 --- a/src/download.rs +++ b/src/download.rs @@ -52,7 +52,13 @@ fn get_file_name(response: &Response, orig_url: &reqwest::Url) -> String { .or_else(|| from_url(orig_url)) .unwrap_or_else(|| "index".to_string()); - let filename = sanitize_filename::sanitize(&filename); + let filename = sanitize_filename::sanitize_with_options( + &filename, + sanitize_filename::Options { + replacement: "_", + ..Default::default() + }, + ); let mut filename = filename.trim().trim_start_matches('.').to_string(); diff --git a/tests/cases/download.rs b/tests/cases/download.rs index 5ad4b17..41c4ec9 100644 --- a/tests/cases/download.rs +++ b/tests/cases/download.rs @@ -166,6 +166,30 @@ fn download_support_filename_rfc_5987_percent_encoded() { ); } +#[test] +fn download_support_filename_rfc_5987_percent_encoded_with_iso_8859_1() { + let dir = tempdir().unwrap(); + let server = server::http(|_req| async move { + hyper::Response::builder() + .header( + "Content-Disposition", + r#"attachment; filename*=iso-8859-1'en'%A3%20rates.txt"#, + ) + .body("file".into()) + .unwrap() + }); + + get_command() + .args(["--download", &server.base_url()]) + .current_dir(&dir) + .assert() + .success(); + assert_eq!( + fs::read_to_string(dir.path().join("£ rates.txt")).unwrap(), + "file" + ); +} + #[test] fn download_filename_star_with_high_priority() { let dir = tempdir().unwrap(); @@ -230,7 +254,7 @@ fn download_filename_with_directory_traversal() { .assert() .success(); assert_eq!( - fs::read_to_string(dir.path().join("foobazbar")).unwrap(), + fs::read_to_string(dir.path().join("foo_baz_bar")).unwrap(), "file" ); } @@ -255,7 +279,7 @@ fn download_filename_with_windows_directory_traversal() { .assert() .success(); assert_eq!( - fs::read_to_string(dir.path().join("foobazbar")).unwrap(), + fs::read_to_string(dir.path().join("foo_baz_bar")).unwrap(), "file" ); }