我一直认为手写文本解析器很难,但是在体验 winnow 之后,发现文本解析器值得上手,这里我举三个优点:
- 可复用性强
- 可解析流
- 易于测试
下面的代码展示了如何使用 winnow 解析 lux 输出和进度条。
use std::time::Duration;
use winnow::{
ascii::{float, line_ending, not_line_ending, space0, space1},
combinator::{alt, delimited, opt, repeat, seq, terminated},
prelude::*,
stream::Partial,
token::{tag, take_until1},
};
pub type InputStream<'i> = Partial<&'i [u8]>;
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Size {
B(f64),
KiB(f64),
MiB(f64),
GiB(f64),
TiB(f64),
KB(f64),
MB(f64),
GB(f64),
TB(f64),
}
impl Size {
#[rustfmt::skip]
fn to_f64(&self) -> f64 {
match self {
Size::B(size) => size * 1f64,
Size::KiB(size) => size * 1024f64,
Size::MiB(size) => size * 1048576f64,
Size::GiB(size) => size * 1073741824f64,
Size::TiB(size) => size * 1099511627776f64,
Size::KB(size) => size * 1e3,
Size::MB(size) => size * 1e6,
Size::GB(size) => size * 1e9,
Size::TB(size) => size * 1e12,
}
}
#[rustfmt::skip]
pub fn to_string(&self) -> String {
match self {
Size::B(size) => format!("{:.2} B", size),
Size::KiB(size) => format!("{:.2} KiB", size),
Size::MiB(size) => format!("{:.2} MiB", size),
Size::GiB(size) => format!("{:.2} GiB", size),
Size::TiB(size) => format!("{:.2} TiB", size),
Size::KB(size) => format!("{:.2} KB", size),
Size::MB(size) => format!("{:.2} MB", size),
Size::GB(size) => format!("{:.2} GB", size),
Size::TB(size) => format!("{:.2} TB", size),
}
}
}
/// Parse unit string such as `MiB` to SizeUnit
pub fn size(input: &mut InputStream) -> PResult<Size> {
alt((
terminated(float, (space0, "B")).map(Size::B),
terminated(float, (space0, "KiB")).map(Size::KiB),
terminated(float, (space0, "MiB")).map(Size::MiB),
terminated(float, (space0, "GiB")).map(Size::GiB),
terminated(float, (space0, "TiB")).map(Size::TiB),
terminated(float, (space0, "KB")).map(Size::KB),
terminated(float, (space0, "MB")).map(Size::MB),
terminated(float, (space0, "GB")).map(Size::GB),
terminated(float, (space0, "TB")).map(Size::TB),
))
.parse_next(input)
}
/// Parse optional size string such as `16.40 MiB` or `?`
pub fn optional_size(input: &mut InputStream) -> PResult<Option<Size>> {
alt((size.map(|x| Some(x)), tag("?").map(|_| None))).parse_next(input)
}
#[derive(Debug, Default, Clone, PartialEq)]
pub struct Time {
years: Option<f64>,
months: Option<f64>,
weeks: Option<f64>,
days: Option<f64>,
hours: Option<f64>,
minutes: Option<f64>,
seconds: Option<f64>,
}
impl Time {
fn is_valid(&self) -> bool {
!(self.years.is_none()
&& self.months.is_none()
&& self.weeks.is_none()
&& self.days.is_none()
&& self.hours.is_none()
&& self.minutes.is_none()
&& self.seconds.is_none())
}
pub fn to_duration(&self) -> Duration {
let secs = self.years.unwrap_or(0.0) * 365.25 * 24.0 * 3600.0
+ self.months.unwrap_or(0.0) * 30.44 * 24.0 * 3600.0
+ self.weeks.unwrap_or(0.0) * 7.0 * 24.0 * 3600.0
+ self.days.unwrap_or(0.0) * 24.0 * 3600.0
+ self.hours.unwrap_or(0.0) * 3600.0
+ self.minutes.unwrap_or(0.0) * 60.0
+ self.seconds.unwrap_or(0.0);
Duration::new(secs as u64, 0 as u32)
}
}
/// Parse duration string such as `2h3m1s`
#[rustfmt::skip]
pub fn time(input: &mut InputStream) -> PResult<Time> {
seq! {Time {
years: opt(terminated(float, "y")),
months: opt(terminated(float, "M")),
weeks: opt(terminated(float, "w")),
days: opt(terminated(float, "d")),
hours: opt(terminated(float, "h")),
minutes: opt(terminated(float, "m")),
seconds: opt(terminated(float, "s"))
}}
.verify(|t| t.is_valid())
.parse_next(input)
}
/// Parse optional time string such as `3h2s` or `?`
#[rustfmt::skip]
pub fn optional_time(input: &mut InputStream) -> PResult<Option<Time>> {
alt((
time.map(|x| Some(x)),
tag("?").map(|_| None)
))
.parse_next(input)
}
/// Parse persent string such as `10.3%`
pub fn persent(input: &mut InputStream) -> PResult<f64> {
terminated(float, "%").parse_next(input)
}
#[derive(Debug, Clone, PartialEq)]
pub struct Progress {
pub pfinished: Size,
pub ptotal: Size,
pub pspeed: Option<Size>,
pub ppersent: f64,
pub pleft: Option<Time>,
}
/// Parse progress like `15.61 MiB / 16.40 MiB [============>--] 1.12 MiB p/s 95.19% 0s`
pub fn progress(input: &mut InputStream) -> PResult<Progress> {
seq! {Progress {
pfinished: size,
_: (space1, "/", space1),
ptotal: size,
_: (space1, ("[", take_until1("]"), "]"), space1),
pspeed: terminated(optional_size, (space0, "p/s")),
_: space1,
ppersent: persent,
_: space1,
pleft: optional_time
}}
.parse_next(input)
}
#[derive(Debug, PartialEq)]
pub struct PureProgress {
pub pfinished: String,
pub ptotal: String,
pub pspeed: String,
pub ppersent: f64,
pub pleft: String,
}
fn bytes_to_string(bytes: &[u8]) -> String {
String::from_utf8_lossy(bytes).to_string()
}
pub fn pure_progress(input: &mut InputStream) -> PResult<PureProgress> {
seq! {PureProgress {
pfinished: size.recognize().map(bytes_to_string),
_: (space1, "/", space1),
ptotal: size.recognize().map(bytes_to_string),
_: (space1, "[", take_until1("]"), "]", space1),
pspeed: terminated(optional_size, (space0, "p/s")).recognize().map(bytes_to_string),
_: space1,
ppersent: persent,
_: space1,
pleft: optional_time.recognize().map(bytes_to_string),
}}
.parse_next(input)
}
#[derive(Debug, Clone, PartialEq)]
pub struct Stream {
pub sid: String,
pub squality: String,
pub ssize: f64,
pub ssize_bytes: f64,
}
/// Parse info stream from lux stdout
fn stream(input: &mut &[u8]) -> PResult<Stream> {
seq! {Stream {
sid: delimited(" [", take_until1("]"), "] -------------------").map(bytes_to_string),
_: line_ending,
squality: delimited(" Quality: ", not_line_ending, line_ending).map(bytes_to_string),
ssize: delimited(" Size: ", float, " MiB "),
ssize_bytes: delimited("(", float, " Bytes)"),
_: line_ending,
_: delimited(" # download with: lux -f ", take_until1(" "), " ..."),
_: (line_ending, line_ending)
}}
.parse_next(input)
}
#[derive(Debug, Clone, PartialEq)]
pub struct Header {
pub hsite: String,
pub htitle: String,
pub htype: String,
}
/// Parse info header from lux stdout
fn header(input: &mut &[u8]) -> PResult<Header> {
seq! {Header {
_: line_ending,
hsite: delimited(" Site: ", not_line_ending, line_ending).map(bytes_to_string),
htitle: delimited(" Title: ", not_line_ending, line_ending).map(bytes_to_string),
htype: delimited(" Type: ", not_line_ending, line_ending).map(bytes_to_string),
}}
.parse_next(input)
}
#[derive(Debug, Clone, PartialEq)]
pub struct Info {
pub iheader: Header,
pub istreams: Vec<Stream>,
}
/// Parse info stream from lux stdout
pub fn info(input: &mut &[u8]) -> PResult<Info> {
seq! {Info {
iheader: header,
_: " Streams: # All available quality",
_: line_ending,
istreams: repeat(1.., stream),
}}
.parse_next(input)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_size() {
assert_eq!(
size.parse_next(&mut Partial::new(b"10 MiB")),
Ok(Size::MiB(10.0))
);
assert!(size
.parse_next(&mut Partial::new(b"-10.000 MiB"))
.is_ok());
assert!(size.parse_next(&mut Partial::new(b"10 MiiB")).is_err());
}
#[test]
fn test_size_to() {
let size = Size::MiB(1024.0);
assert_eq!(size.to_f64(), 1024.0 * 1024.0 * 1024.0);
assert_eq!(size.to_string(), "1024.00 MiB");
}
#[test]
fn test_optional_size() {
assert_eq!(
optional_size.parse_next(&mut Partial::new(b"10 MiB")),
Ok(Some(Size::MiB(10.0)))
);
assert!(optional_size
.parse_next(&mut Partial::new(b"10 MiiB"))
.is_err());
}
#[test]
fn test_persent() {
assert_eq!(persent.parse_next(&mut Partial::new(b"20.3%")), Ok(20.3));
}
#[test]
fn test_time() {
assert_eq!(
time.parse_next(&mut Partial::new(b"10h20s")),
Ok(Time {
hours: Some(10.0),
seconds: Some(20.0),
..Default::default()
})
);
assert_eq!(
time.parse_next(&mut Partial::new(b"10m10h")),
Ok(Time {
minutes: Some(10.0),
..Default::default()
})
);
}
#[test]
fn test_time_to() {
let time = Time {
hours: Some(100.0),
..Default::default()
};
assert_eq!(time.to_duration(), Duration::from_secs_f64(100.0 * 3600.0));
}
#[test]
fn test_optional_time() {
assert_eq!(
optional_time.parse_next(&mut Partial::new(b"10s")),
Ok(Some(Time {
seconds: Some(10.0),
..Default::default()
}))
);
assert_eq!(optional_time.parse_next(&mut Partial::new(b"?")), Ok(None));
assert!(time.parse_next(&mut Partial::new(b"")).is_err());
}
#[test]
fn test_progress() {
assert_eq!(
progress.parse_next(&mut Partial::new(
b"15.61 MiB / 16.40 MiB [============>--] 1.12 MiB p/s 95.19% 0s"
)),
Ok(Progress {
pfinished: Size::MiB(15.61),
ptotal: Size::MiB(16.40),
pspeed: Some(Size::MiB(1.12)),
ppersent: 95.19,
pleft: Some(Time {
seconds: Some(0.0),
..Default::default()
})
})
)
}
#[test]
fn test_progress_2() {
assert_eq!(
progress.parse_next(&mut Partial::new(
b"0 MiB / 16.40 MiB [============>--] ? p/s 0.0% ?"
)),
Ok(Progress {
pfinished: Size::MiB(0.0),
ptotal: Size::MiB(16.40),
pspeed: None,
ppersent: 0.0,
pleft: None
})
)
}
#[test]
fn test_pure_progress() {
assert_eq!(
pure_progress.parse_next(&mut Partial::new(
b"15.61 MiB / 16.40 MiB [============>--] 1.12 MiB p/s 95.19% 0s"
)),
Ok(PureProgress {
pfinished: "15.61 MiB".to_string(),
ptotal: "16.40 MiB".to_string(),
pspeed: "1.12 MiB p/s".to_string(),
ppersent: 95.19,
pleft: "0s".to_string()
})
)
}
#[test]
fn test_header() {
let header_text = r#"
Site: Universal
Title: th
Type: image/jpeg
"#;
assert_eq!(
header.parse_next(&mut header_text.as_bytes()),
Ok(Header {
hsite: "Universal".to_owned(),
htitle: "th".to_owned(),
htype: "image/jpeg".to_owned(),
})
)
}
#[test]
fn test_stream() {
let stream_text = r#" [default] -------------------
Quality: default
Size: 0.01 MiB (8106 Bytes)
# download with: lux -f default ...
"#;
assert_eq!(
stream.parse_next(&mut stream_text.as_bytes()),
Ok(Stream {
sid: "default".to_owned(),
squality: "default".to_owned(),
ssize: 0.01,
ssize_bytes: 8106.0
})
)
}
#[test]
fn test_info() {
let info_text = r#"
Site: Universal
Title: th
Type: image/jpeg
Streams: # All available quality
[default] -------------------
Quality: default
Size: 0.01 MiB (8106 Bytes)
# download with: lux -f default ...
"#;
assert_eq!(
info.parse_next(&mut info_text.as_bytes()),
Ok(Info {
iheader: Header {
hsite: "Universal".to_owned(),
htitle: "th".to_owned(),
htype: "image/jpeg".to_owned(),
},
istreams: vec![Stream {
sid: "default".to_owned(),
squality: "default".to_owned(),
ssize: 0.01,
ssize_bytes: 8106.0
}]
})
)
}
#[test]
fn test_info_2() {
let info_text = r#"
Site: 哔哩哔哩 bilibili.com
Title: 【莓机会了】甜到虐哭的13集单集MAD「我现在什么都不想干,更不想看14集」
Type: video
Streams: # All available quality
[32-12] -------------------
Quality: 清晰 480P hev1.1.6.L120.90
Size: 16.40 MiB (17195126 Bytes)
# download with: lux -f 32-12 ...
[32-7] -------------------
Quality: 清晰 480P avc1.64001F
Size: 11.30 MiB (11845066 Bytes)
# download with: lux -f 32-7 ...
[16-12] -------------------
Quality: 流畅 360P hev1.1.6.L120.90
Size: 9.37 MiB (9830366 Bytes)
# download with: lux -f 16-12 ...
[16-7] -------------------
Quality: 流畅 360P avc1.64001E
Size: 7.99 MiB (8379190 Bytes)
# download with: lux -f 16-7 ...
"#;
assert_eq!(
info.parse_next(&mut info_text.as_bytes()),
Ok(Info {
iheader: Header {
hsite: "哔哩哔哩 bilibili.com".to_owned(),
htitle: "【莓机会了】甜到虐哭的13集单集MAD「我现在什么都不想干,更不想看14集」"
.to_owned(),
htype: "video".to_owned()
},
istreams: vec![
Stream {
sid: "32-12".to_owned(),
squality: "清晰 480P hev1.1.6.L120.90".to_owned(),
ssize: 16.4,
ssize_bytes: 17195126.0
},
Stream {
sid: "32-7".to_owned(),
squality: "清晰 480P avc1.64001F".to_owned(),
ssize: 11.3,
ssize_bytes: 11845066.0
},
Stream {
sid: "16-12".to_owned(),
squality: "流畅 360P hev1.1.6.L120.90".to_owned(),
ssize: 9.37,
ssize_bytes: 9830366.0
},
Stream {
sid: "16-7".to_owned(),
squality: "流畅 360P avc1.64001E".to_owned(),
ssize: 7.99,
ssize_bytes: 8379190.0
}
]
})
)
}
}