feat: parse inline span tags and css color attributes in them

This commit is contained in:
Matias Fontanini 2024-11-10 13:56:45 -08:00
parent 67b71bbef8
commit fb1ffcc996
7 changed files with 270 additions and 10 deletions

7
Cargo.lock generated
View File

@ -963,6 +963,7 @@ dependencies = [
"syntect",
"tempfile",
"thiserror",
"tl",
"unicode-width 0.2.0",
]
@ -1517,6 +1518,12 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "tl"
version = "0.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b130bd8a58c163224b44e217b4239ca7b927d82bf6cc2fea1fc561d15056e3f7"
[[package]]
name = "typed-arena"
version = "2.0.2"

View File

@ -31,6 +31,7 @@ serde_json = "1.0"
serde_with = "3.6"
strum = { version = "0.26", features = ["derive"] }
tempfile = "3.10"
tl = "0.7"
console = "0.15.8"
thiserror = "1"
unicode-width = "0.2"

179
src/markdown/html.rs Normal file
View File

@ -0,0 +1,179 @@
use crate::style::{Color, ParseColorError, TextStyle};
use std::{borrow::Cow, str, str::Utf8Error};
use tl::Attributes;
pub(crate) struct HtmlParseOptions {
pub(crate) strict: bool,
}
impl Default for HtmlParseOptions {
fn default() -> Self {
Self { strict: true }
}
}
#[derive(Default)]
pub(crate) struct HtmlParser {
options: HtmlParseOptions,
}
impl HtmlParser {
pub(crate) fn parse(self, input: &str) -> Result<HtmlInline, ParseHtmlError> {
if input.starts_with("</") {
if input.starts_with("</span") {
return Ok(HtmlInline::CloseSpan);
} else {
return Err(ParseHtmlError::UnsupportedClosingTag(input.to_string()));
}
}
let dom = tl::parse(input, Default::default())?;
let top = dom.children().iter().next().ok_or(ParseHtmlError::NoTags)?;
let node = top.get(dom.parser()).expect("failed to get");
let tag = node.as_tag().ok_or(ParseHtmlError::NoTags)?;
if tag.name().as_bytes() != b"span" {
return Err(ParseHtmlError::UnsupportedHtml);
}
let style = self.parse_attributes(tag.attributes())?;
Ok(HtmlInline::OpenSpan { style })
}
fn parse_attributes(&self, attributes: &Attributes) -> Result<TextStyle, ParseHtmlError> {
let mut style = TextStyle::default();
for (name, value) in attributes.iter() {
let value = value.unwrap_or(Cow::Borrowed(""));
match name.as_ref() {
"style" => self.parse_css_attribute(&value, &mut style)?,
_ => {
if self.options.strict {
return Err(ParseHtmlError::UnsupportedTagAttribute(name.to_string()));
}
}
}
}
Ok(style)
}
fn parse_css_attribute(&self, attribute: &str, style: &mut TextStyle) -> Result<(), ParseHtmlError> {
for attribute in attribute.split(';') {
let attribute = attribute.trim();
if attribute.is_empty() {
continue;
}
let (key, value) = attribute.split_once(':').ok_or(ParseHtmlError::NoColonInAttribute)?;
let key = key.trim();
let value = value.trim();
match key {
"color" => *style = style.fg_color(Self::parse_color(value)?),
"background-color" => *style = style.bg_color(Self::parse_color(value)?),
_ => {
if self.options.strict {
return Err(ParseHtmlError::UnsupportedCssAttribute(key.into()));
}
}
}
}
Ok(())
}
fn parse_color(input: &str) -> Result<Color, ParseHtmlError> {
if input.starts_with('#') {
let color = input.strip_prefix('#').unwrap().parse()?;
if matches!(color, Color::Rgb { .. }) { Ok(color) } else { Ok(input.parse()?) }
} else {
let color = input.parse::<Color>()?;
if matches!(color, Color::Rgb { .. }) {
Err(ParseHtmlError::InvalidColor("missing '#' in rgb color".into()))
} else {
Ok(color)
}
}
}
}
#[derive(Debug)]
pub(crate) enum HtmlInline {
OpenSpan { style: TextStyle },
CloseSpan,
}
#[derive(Debug, thiserror::Error)]
pub(crate) enum ParseHtmlError {
#[error("parsing html failed: {0}")]
ParsingHtml(#[from] tl::ParseError),
#[error("no html tags found")]
NoTags,
#[error("non utf8 content: {0}")]
NotUtf8(#[from] Utf8Error),
#[error("attribute has no ':'")]
NoColonInAttribute,
#[error("invalid color: {0}")]
InvalidColor(String),
#[error("invalid css attribute: {0}")]
UnsupportedCssAttribute(String),
#[error("HTML can only contain span tags")]
UnsupportedHtml,
#[error("unsupported tag attribute: {0}")]
UnsupportedTagAttribute(String),
#[error("unsupported closing tag: {0}")]
UnsupportedClosingTag(String),
}
impl From<ParseColorError> for ParseHtmlError {
fn from(e: ParseColorError) -> Self {
Self::InvalidColor(e.to_string())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::style::Color;
use rstest::rstest;
#[test]
fn parse() {
let tag =
HtmlParser::default().parse(r#"<span style="color: red; background-color: black">"#).expect("parse failed");
let HtmlInline::OpenSpan { style } = tag else { panic!("not an open tag") };
assert_eq!(style, TextStyle::default().bg_color(Color::Black).fg_color(Color::Red));
}
#[test]
fn parse_end_tag() {
let tag = HtmlParser::default().parse("</span>").expect("parse failed");
assert!(matches!(tag, HtmlInline::CloseSpan));
}
#[rstest]
#[case::invalid_start_tag("<div>")]
#[case::invalid_end_tag("</div>")]
#[case::invalid_attribute("<span foo=\"bar\">")]
#[case::invalid_attribute("<span style=\"bleh: 42\"")]
#[case::invalid_color("<span style=\"color: 42\"")]
fn parse_invalid_html(#[case] input: &str) {
HtmlParser::default().parse(input).expect_err("parse succeeded");
}
#[rstest]
#[case::rgb("#ff0000", Color::Rgb{r: 255, g: 0, b: 0})]
#[case::red("red", Color::Red)]
fn parse_color(#[case] input: &str, #[case] expected: Color) {
let color: Color = HtmlParser::parse_color(input).expect("parse failed");
assert_eq!(color, expected);
}
#[rstest]
#[case::rgb("ff0000")]
#[case::red("#red")]
fn parse_invalid_color(#[case] input: &str) {
HtmlParser::parse_color(input).expect_err("parse succeeded");
}
}

View File

@ -1,3 +1,4 @@
pub(crate) mod elements;
pub(crate) mod html;
pub(crate) mod parse;
pub(crate) mod text;

View File

@ -1,4 +1,7 @@
use super::elements::SourcePosition;
use super::{
elements::SourcePosition,
html::{HtmlInline, HtmlParser, ParseHtmlError},
};
use crate::{
markdown::elements::{ListItem, ListItemType, MarkdownElement, Table, TableRow, Text, TextBlock},
style::TextStyle,
@ -100,7 +103,7 @@ impl<'a> MarkdownParser<'a> {
NodeValue::Table(_) => self.parse_table(node)?,
NodeValue::CodeBlock(block) => Self::parse_code_block(block, data.sourcepos)?,
NodeValue::ThematicBreak => MarkdownElement::ThematicBreak,
NodeValue::HtmlBlock(block) => Self::parse_html_block(block, data.sourcepos)?,
NodeValue::HtmlBlock(block) => self.parse_html_block(block, data.sourcepos)?,
NodeValue::BlockQuote | NodeValue::MultilineBlockQuote(_) => self.parse_block_quote(node)?,
other => return Err(ParseErrorKind::UnsupportedElement(other.identifier()).with_sourcepos(data.sourcepos)),
};
@ -119,7 +122,7 @@ impl<'a> MarkdownParser<'a> {
Ok(MarkdownElement::FrontMatter(contents.into()))
}
fn parse_html_block(block: &NodeHtmlBlock, sourcepos: Sourcepos) -> ParseResult<MarkdownElement> {
fn parse_html_block(&self, block: &NodeHtmlBlock, sourcepos: Sourcepos) -> ParseResult<MarkdownElement> {
let block = block.literal.trim();
let start_tag = "<!--";
let end_tag = "-->";
@ -333,7 +336,12 @@ impl<'a> InlinesParser<'a> {
}
}
fn process_node(&mut self, node: &'a AstNode<'a>, parent: &'a AstNode<'a>, style: TextStyle) -> ParseResult<()> {
fn process_node(
&mut self,
node: &'a AstNode<'a>,
parent: &'a AstNode<'a>,
style: TextStyle,
) -> ParseResult<Option<HtmlStyle>> {
let data = node.data.borrow();
match &data.value {
NodeValue::Text(text) => {
@ -350,7 +358,7 @@ impl<'a> InlinesParser<'a> {
SoftBreak::Newline => {
self.store_pending_text();
}
SoftBreak::Space => self.pending_text.push(Text::from(" ")),
SoftBreak::Space => self.pending_text.push(Text::new(" ", style)),
};
}
NodeValue::Link(link) => {
@ -376,7 +384,7 @@ impl<'a> InlinesParser<'a> {
NodeValue::Image(link) => {
if matches!(self.stringify_images, StringifyImages::Yes) {
self.pending_text.push(Text::from(format!("![{}]({})", link.title, link.url)));
return Ok(());
return Ok(None);
}
self.store_pending_text();
@ -419,22 +427,49 @@ impl<'a> InlinesParser<'a> {
};
self.process_children(node, style)?;
}
NodeValue::HtmlInline(html) => {
let html_inline = HtmlParser::default()
.parse(html)
.map_err(|e| ParseErrorKind::InvalidHtml(e).with_sourcepos(data.sourcepos))?;
match html_inline {
HtmlInline::OpenSpan { style } => return Ok(Some(HtmlStyle::Add(style))),
HtmlInline::CloseSpan => return Ok(Some(HtmlStyle::Remove)),
};
}
other => {
return Err(ParseErrorKind::UnsupportedStructure { container: "text", element: other.identifier() }
.with_sourcepos(data.sourcepos));
}
};
Ok(())
Ok(None)
}
fn process_children(&mut self, root: &'a AstNode<'a>, style: TextStyle) -> ParseResult<()> {
fn process_children(&mut self, root: &'a AstNode<'a>, base_style: TextStyle) -> ParseResult<()> {
let mut html_styles = Vec::new();
let mut style = base_style;
for node in root.children() {
self.process_node(node, root, style)?;
if let Some(html_style) = self.process_node(node, root, style)? {
match html_style {
HtmlStyle::Add(style) => html_styles.push(style),
HtmlStyle::Remove => {
html_styles.pop();
}
};
style = base_style;
for html_style in html_styles.iter().rev() {
style.merge(html_style);
}
}
}
Ok(())
}
}
enum HtmlStyle {
Add(TextStyle),
Remove,
}
enum Inline {
Text(TextBlock),
Image { path: String, title: String },
@ -485,6 +520,9 @@ pub(crate) enum ParseErrorKind {
/// We don't support unfenced code blocks.
UnfencedCodeBlock,
/// Invalid HTML was found.
InvalidHtml(ParseHtmlError),
/// An internal parsing error.
Internal(String),
}
@ -497,6 +535,7 @@ impl Display for ParseErrorKind {
write!(f, "unsupported structure in {container}: {element}")
}
Self::UnfencedCodeBlock => write!(f, "only fenced code blocks are supported"),
Self::InvalidHtml(inner) => write!(f, "invalid HTML: {inner}"),
Self::Internal(message) => write!(f, "internal error: {message}"),
}
}
@ -559,6 +598,8 @@ impl Identifier for NodeValue {
#[cfg(test)]
mod test {
use crate::style::Color;
use super::*;
use rstest::rstest;
use std::path::Path;
@ -613,6 +654,23 @@ boop
assert_eq!(elements, expected_elements);
}
#[test]
fn html_inlines() {
let parsed = parse_single(
"hi<span style=\"color: red\">red<span style=\"background-color: blue\">blue<span style=\"color: yellow\">yellow</span></span></span>",
);
let MarkdownElement::Paragraph(elements) = parsed else { panic!("not a paragraph: {parsed:?}") };
let expected_chunks = vec![
Text::from("hi"),
Text::new("red", TextStyle::default().fg_color(Color::Red)),
Text::new("blue", TextStyle::default().fg_color(Color::Red).bg_color(Color::Blue)),
Text::new("yellow", TextStyle::default().fg_color(Color::Yellow).bg_color(Color::Blue)),
];
let expected_elements = &[TextBlock(expected_chunks)];
assert_eq!(elements, expected_elements);
}
#[test]
fn link_wo_label_wo_title() {
let parsed = parse_single("my [](https://example.com)");

View File

@ -162,7 +162,9 @@ where
let positioning = layout.compute(dimensions, text.width() as u16);
let prefix = "".into();
let text_drawer = TextDrawer::new(&prefix, 0, text, positioning, &self.colors)?;
text_drawer.draw(self.terminal)
text_drawer.draw(self.terminal)?;
// Restore colors
self.apply_colors()
}
fn render_line_break(&mut self) -> RenderResult {

View File

@ -65,6 +65,18 @@ impl TextStyle {
self
}
/// Set the background color for this text style.
pub(crate) fn bg_color(mut self, color: Color) -> Self {
self.colors.background = Some(color);
self
}
/// Set the foreground color for this text style.
pub(crate) fn fg_color(mut self, color: Color) -> Self {
self.colors.foreground = Some(color);
self
}
/// Check whether this text style is bold.
pub(crate) fn is_bold(&self) -> bool {
self.has_flag(TextFormatFlags::Bold)