mirror of
https://github.com/nushell/nushell.git
synced 2025-05-25 09:01:17 +00:00
* add table selector for downloading web tables * type-o * updated debug mode to inspect mode
209 lines
6.3 KiB
Rust
209 lines
6.3 KiB
Rust
use crate::Table;
|
|
use nipper::Document;
|
|
use nu_protocol::{value::StringExt, Primitive, TaggedDictBuilder, UntaggedValue, Value};
|
|
use nu_source::Tag;
|
|
|
|
pub struct Selector {
|
|
pub query: String,
|
|
pub tag: Tag,
|
|
pub as_html: bool,
|
|
pub attribute: String,
|
|
pub as_table: Value,
|
|
pub inspect: bool,
|
|
}
|
|
|
|
impl Selector {
|
|
pub fn new() -> Selector {
|
|
Selector {
|
|
query: String::new(),
|
|
tag: Tag::unknown(),
|
|
as_html: false,
|
|
attribute: String::new(),
|
|
as_table: Value::new(
|
|
UntaggedValue::Primitive(Primitive::String("".to_string())),
|
|
Tag::unknown(),
|
|
),
|
|
inspect: false,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Default for Selector {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
pub fn begin_selector_query(input_html: String, selector: &Selector) -> Vec<Value> {
|
|
if selector.as_table.is_some() {
|
|
retrieve_tables(input_html.as_str(), &selector.as_table, selector.inspect)
|
|
} else {
|
|
match selector.attribute.is_empty() {
|
|
true => execute_selector_query(
|
|
input_html.as_str(),
|
|
selector.query.as_str(),
|
|
selector.as_html,
|
|
),
|
|
false => execute_selector_query_with_attribute(
|
|
input_html.as_str(),
|
|
selector.query.as_str(),
|
|
selector.attribute.as_str(),
|
|
),
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn retrieve_tables(input_string: &str, columns: &Value, inspect_mode: bool) -> Vec<Value> {
|
|
let html = input_string;
|
|
let mut cols = Vec::new();
|
|
if let UntaggedValue::Table(t) = &columns.value {
|
|
for x in t {
|
|
cols.push(x.convert_to_string());
|
|
}
|
|
}
|
|
|
|
if inspect_mode {
|
|
eprintln!("Passed in Column Headers = {:#?}", &cols,);
|
|
}
|
|
|
|
let mut table = match Table::find_by_headers(html, &cols) {
|
|
Some(t) => {
|
|
if inspect_mode {
|
|
eprintln!("Table Found = {:#?}", &t);
|
|
}
|
|
t
|
|
}
|
|
None => Table::empty(),
|
|
};
|
|
|
|
let mut table_out = Vec::new();
|
|
|
|
// since cols was empty and headers is not, it means that headers were manually populated
|
|
// so let's fake the data in order to build a proper table. this situation happens when
|
|
// there are tables where the first column is actually the headers. kind of like a table
|
|
// that has been rotated ccw 90 degrees
|
|
if cols.is_empty() && !table.headers().is_empty() {
|
|
for col in table.headers().keys() {
|
|
cols.push(col.to_string());
|
|
}
|
|
|
|
let mut data2 = Vec::new();
|
|
for x in &table.data {
|
|
data2.push(x.join(", "));
|
|
}
|
|
// eprintln!("data2={:?}", data2);
|
|
table.data = vec![data2];
|
|
}
|
|
|
|
// if columns are still empty, let's just make a single column table with the data
|
|
if cols.is_empty() {
|
|
let table_with_no_empties: Vec<_> = table.iter().filter(|item| !item.is_empty()).collect();
|
|
|
|
for row in &table_with_no_empties {
|
|
let mut dict = TaggedDictBuilder::new(Tag::unknown());
|
|
for (counter, cell) in row.iter().enumerate() {
|
|
let col_name = format!("Column{}", counter);
|
|
dict.insert_value(
|
|
col_name,
|
|
UntaggedValue::Primitive(Primitive::String(cell.to_string()))
|
|
.into_value(Tag::unknown()),
|
|
);
|
|
}
|
|
table_out.push(dict.into_value());
|
|
}
|
|
} else {
|
|
for row in &table {
|
|
let mut dict = TaggedDictBuilder::new(Tag::unknown());
|
|
// eprintln!("row={:?}", &row);
|
|
for col in &cols {
|
|
// eprintln!("col={:?}", &col);
|
|
let key = col.to_string();
|
|
let val = row
|
|
.get(col)
|
|
.unwrap_or(&format!("Missing column: '{}'", &col))
|
|
.to_string();
|
|
dict.insert_value(
|
|
key,
|
|
UntaggedValue::Primitive(Primitive::String(val)).into_value(Tag::unknown()),
|
|
);
|
|
}
|
|
table_out.push(dict.into_value());
|
|
}
|
|
}
|
|
table_out
|
|
}
|
|
|
|
fn execute_selector_query_with_attribute(
|
|
input_string: &str,
|
|
query_string: &str,
|
|
attribute: &str,
|
|
) -> Vec<Value> {
|
|
let doc = Document::from(input_string);
|
|
|
|
doc.select(query_string)
|
|
.iter()
|
|
.map(|selection| {
|
|
selection
|
|
.attr_or(attribute, "")
|
|
.to_string()
|
|
.to_string_value_create_tag()
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
fn execute_selector_query(input_string: &str, query_string: &str, as_html: bool) -> Vec<Value> {
|
|
let doc = Document::from(input_string);
|
|
|
|
match as_html {
|
|
true => doc
|
|
.select(query_string)
|
|
.iter()
|
|
.map(|selection| selection.html().to_string().to_string_value_create_tag())
|
|
.collect(),
|
|
false => doc
|
|
.select(query_string)
|
|
.iter()
|
|
.map(|selection| selection.text().to_string().to_string_value_create_tag())
|
|
.collect(),
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use nipper::Document;
|
|
|
|
#[test]
|
|
fn create_document_from_string() {
|
|
let html = r#"<div name="foo" value="bar"></div>"#;
|
|
let document = Document::from(html);
|
|
let shouldbe =
|
|
r#"<html><head></head><body><div name="foo" value="bar"></div></body></html>"#;
|
|
|
|
assert_eq!(shouldbe.to_string(), document.html().to_string());
|
|
}
|
|
|
|
#[test]
|
|
fn modify_html_document() {
|
|
let html = r#"<div name="foo" value="bar"></div>"#;
|
|
let document = Document::from(html);
|
|
let mut input = document.select(r#"div[name="foo"]"#);
|
|
input.set_attr("id", "input");
|
|
input.remove_attr("name");
|
|
|
|
let shouldbe = "bar".to_string();
|
|
let actual = input.attr("value").unwrap().to_string();
|
|
|
|
assert_eq!(shouldbe, actual);
|
|
}
|
|
|
|
// #[test]
|
|
// fn test_hacker_news() -> Result<(), ShellError> {
|
|
// let html = reqwest::blocking::get("https://news.ycombinator.com")?.text()?;
|
|
// let document = Document::from(&html);
|
|
// let result = query(html, ".hnname a".to_string(), Tag::unknown());
|
|
// let shouldbe = Ok(vec!["Hacker News".to_str_value_create_tag()]);
|
|
// assert_eq!(shouldbe, result);
|
|
// Ok(())
|
|
// }
|
|
}
|