download file parse filename add RFC 5987 support

support percent-encoded filename

Signed-off-by: zuisong <com.me@foxmail.com>
This commit is contained in:
zuisong 2025-04-10 11:52:25 +08:00
parent be990ac505
commit a60a52453a
No known key found for this signature in database
GPG Key ID: A617F31AFE6F5E8D
6 changed files with 205 additions and 12 deletions

1
Cargo.lock generated
View File

@ -2813,6 +2813,7 @@ dependencies = [
"once_cell",
"os_display",
"pem",
"percent-encoding",
"predicates",
"rand",
"regex-lite",

View File

@ -60,6 +60,7 @@ log = "0.4.21"
rustls = { version = "0.23.25", optional = true, default-features = false, features = ["logging"] }
tracing = { version = "0.1.41", default-features = false, features = ["log"] }
reqwest_cookie_store = { version = "0.8.0", features = ["serde"] }
percent-encoding = "2.3.1"
[dependencies.reqwest]
version = "0.12.3"

127
src/content_disposition.rs Normal file
View File

@ -0,0 +1,127 @@
use percent_encoding::percent_decode;
/// Parse filename from Content-Disposition header
/// Prioritizes filename* parameter if present, otherwise uses filename parameter
pub fn parse_filename_from_content_disposition(content_disposition: &str) -> Option<String> {
let parts: Vec<&str> = content_disposition
.split(';')
.map(|part| part.trim())
.collect();
// First try to find filename* parameter
for part in parts.iter() {
if part.starts_with("filename*=") {
if let Some(filename) = parse_encoded_filename(part) {
return Some(filename);
}
}
}
// If filename* is not found or parsing failed, try regular filename parameter
for part in parts {
if part.starts_with("filename=") {
return parse_regular_filename(part);
}
}
None
}
/// Parse regular filename parameter
/// Handles both quoted and unquoted filenames
fn parse_regular_filename(part: &str) -> Option<String> {
let filename = part.trim_start_matches("filename=");
// Remove quotes if present
//
// Content-Disposition: attachment; filename="file with \"quotes\".txt" // This won't occur
// Content-Disposition: attachment; filename*=UTF-8''file%20with%20quotes.txt // This is the actual practice
//
// We don't need to handle escaped characters in Content-Disposition header parsing because:
//
// It's not a standard practice
// It rarely occurs in real-world scenarios
// When filenames contain special characters, they should use the filename* parameter
let filename = if filename.starts_with('"') && filename.ends_with('"') {
&filename[1..(filename.len() - 1)]
} else {
filename
};
if filename.is_empty() {
return None;
}
Some(filename.to_string())
}
/// Parse RFC 5987 encoded filename (filename*)
/// Format: charset'language'encoded-value
fn parse_encoded_filename(part: &str) -> Option<String> {
// Remove "filename*=" prefix
let content = part.trim_start_matches("filename*=");
// According to RFC 5987, format should be: charset'language'encoded-value
let parts: Vec<&str> = content.splitn(3, '\'').collect();
if parts.len() != 3 {
return None;
}
let encoded_filename = parts[2];
// Decode using percent-encoding
let decoded = percent_decode(encoded_filename.as_bytes())
.decode_utf8()
.ok()?;
Some(decoded.into_owned())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_filename() {
let header = r#"attachment; filename="example.pdf""#;
assert_eq!(
parse_filename_from_content_disposition(header),
Some("example.pdf".to_string())
);
}
#[test]
fn test_filename_without_quotes() {
let header = "attachment; filename=example.pdf";
assert_eq!(
parse_filename_from_content_disposition(header),
Some("example.pdf".to_string())
);
}
#[test]
fn test_encoded_filename() {
// UTF-8 encoded Chinese filename "测试.pdf"
let header = "attachment; filename*=UTF-8''%E6%B5%8B%E8%AF%95.pdf";
assert_eq!(
parse_filename_from_content_disposition(header),
Some("测试.pdf".to_string())
);
}
#[test]
fn test_both_filenames() {
// When both filename and filename* are present, filename* should be preferred
let header =
r#"attachment; filename="fallback.pdf"; filename*=UTF-8''%E6%B5%8B%E8%AF%95.pdf"#;
assert_eq!(
parse_filename_from_content_disposition(header),
Some("测试.pdf".to_string())
);
}
#[test]
fn test_no_filename() {
let header = "attachment";
assert_eq!(parse_filename_from_content_disposition(header), None);
}
}

View File

@ -3,6 +3,9 @@ use std::io::{self, ErrorKind, IsTerminal};
use std::path::{Path, PathBuf};
use std::time::Instant;
use crate::content_disposition;
use crate::decoder::{decompress, get_compression_type};
use crate::utils::{copy_largebuf, test_pretend_term, HeaderValueExt};
use anyhow::{anyhow, Context, Result};
use indicatif::{HumanBytes, ProgressBar, ProgressStyle};
use mime2ext::mime2ext;
@ -13,9 +16,6 @@ use reqwest::{
StatusCode,
};
use crate::decoder::{decompress, get_compression_type};
use crate::utils::{copy_largebuf, test_pretend_term, HeaderValueExt};
fn get_content_length(headers: &HeaderMap) -> Option<u64> {
headers
.get(CONTENT_LENGTH)
@ -27,20 +27,12 @@ fn get_content_length(headers: &HeaderMap) -> Option<u64> {
// of PathBufs
fn get_file_name(response: &Response, orig_url: &reqwest::Url) -> String {
fn from_header(response: &Response) -> Option<String> {
let quoted = Regex::new("filename=\"([^\"]*)\"").unwrap();
// Alternative form:
let unquoted = Regex::new("filename=([^;=\"]*)").unwrap();
// TODO: support "filename*" version
let header = response
.headers()
.get(CONTENT_DISPOSITION)?
.to_utf8_str()
.ok()?;
let caps = quoted
.captures(header)
.or_else(|| unquoted.captures(header))?;
Some(caps[1].to_string())
content_disposition::parse_filename_from_content_disposition(header)
}
fn from_url(url: &reqwest::Url) -> Option<String> {

View File

@ -2,6 +2,7 @@
mod auth;
mod buffer;
mod cli;
mod content_disposition;
mod decoder;
mod download;
mod error_reporting;

View File

@ -119,6 +119,77 @@ fn download_supplied_unicode_filename() {
);
}
#[test]
fn download_support_filename_rfc_5987() {
let dir = tempdir().unwrap();
let server = server::http(|_req| async move {
hyper::Response::builder()
.header(
"Content-Disposition",
r#"attachment; filename*=UTF-8''abcd1234.txt"#,
)
.body("file".into())
.unwrap()
});
get_command()
.args(["--download", &server.base_url()])
.current_dir(&dir)
.assert()
.success();
assert_eq!(
fs::read_to_string(dir.path().join("abcd1234.txt")).unwrap(),
"file"
);
}
#[test]
fn download_support_filename_rfc_5987_percent_encoded() {
let dir = tempdir().unwrap();
let server = server::http(|_req| async move {
hyper::Response::builder()
.header(
"Content-Disposition",
r#"attachment; filename*=UTF-8''%E6%B5%8B%E8%AF%95.txt"#,
)
.body("file".into())
.unwrap()
});
get_command()
.args(["--download", &server.base_url()])
.current_dir(&dir)
.assert()
.success();
assert_eq!(
fs::read_to_string(dir.path().join("测试.txt")).unwrap(),
"file"
);
}
#[test]
fn download_filename_star_with_high_priority() {
let dir = tempdir().unwrap();
let server = server::http(|_req| async move {
hyper::Response::builder()
.header(
"Content-Disposition",
r#"attachment; filename="fallback.txt"; filename*=UTF-8''%E6%B5%8B%E8%AF%95.txt"#,
)
.body("file".into())
.unwrap()
});
get_command()
.args(["--download", &server.base_url()])
.current_dir(&dir)
.assert()
.success();
assert_eq!(
fs::read_to_string(dir.path().join("测试.txt")).unwrap(),
"file"
);
}
#[test]
fn download_supplied_unquoted_filename() {
let dir = tempdir().unwrap();