我一直认为手写文本解析器很难,但是在体验 winnow 之后,发现文本解析器值得上手,这里我举三个优点:

  • 可复用性强
  • 可解析流
  • 易于测试

下面的代码展示了如何使用 winnow 解析 lux 输出和进度条。

use std::time::Duration;

use winnow::{
    ascii::{float, line_ending, not_line_ending, space0, space1},
    combinator::{alt, delimited, opt, repeat, seq, terminated},
    prelude::*,
    stream::Partial,
    token::{tag, take_until1},
};

pub type InputStream<'i> = Partial<&'i [u8]>;

#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Size {
    B(f64),
    KiB(f64),
    MiB(f64),
    GiB(f64),
    TiB(f64),
    KB(f64),
    MB(f64),
    GB(f64),
    TB(f64),
}

impl Size {
    #[rustfmt::skip]
    fn to_f64(&self) -> f64 {
        match self {
            Size::B(size)   => size * 1f64,
            Size::KiB(size) => size * 1024f64,
            Size::MiB(size) => size * 1048576f64,
            Size::GiB(size) => size * 1073741824f64,
            Size::TiB(size) => size * 1099511627776f64,
            Size::KB(size)  => size * 1e3,
            Size::MB(size)  => size * 1e6,
            Size::GB(size)  => size * 1e9,
            Size::TB(size)  => size * 1e12,
        }
    }

    #[rustfmt::skip]
    pub fn to_string(&self) -> String {
        match self {
            Size::B(size)   => format!("{:.2} B",   size),
            Size::KiB(size) => format!("{:.2} KiB", size),
            Size::MiB(size) => format!("{:.2} MiB", size),
            Size::GiB(size) => format!("{:.2} GiB", size),
            Size::TiB(size) => format!("{:.2} TiB", size),
            Size::KB(size)  => format!("{:.2} KB",  size),
            Size::MB(size)  => format!("{:.2} MB",  size),
            Size::GB(size)  => format!("{:.2} GB",  size),
            Size::TB(size)  => format!("{:.2} TB",  size),
        }
    }
}

/// Parse unit string such as `MiB` to SizeUnit
pub fn size(input: &mut InputStream) -> PResult<Size> {
    alt((
        terminated(float, (space0, "B")).map(Size::B),
        terminated(float, (space0, "KiB")).map(Size::KiB),
        terminated(float, (space0, "MiB")).map(Size::MiB),
        terminated(float, (space0, "GiB")).map(Size::GiB),
        terminated(float, (space0, "TiB")).map(Size::TiB),
        terminated(float, (space0, "KB")).map(Size::KB),
        terminated(float, (space0, "MB")).map(Size::MB),
        terminated(float, (space0, "GB")).map(Size::GB),
        terminated(float, (space0, "TB")).map(Size::TB),
    ))
    .parse_next(input)
}

/// Parse optional size string such as `16.40 MiB` or `?`
pub fn optional_size(input: &mut InputStream) -> PResult<Option<Size>> {
    alt((size.map(|x| Some(x)), tag("?").map(|_| None))).parse_next(input)
}

#[derive(Debug, Default, Clone, PartialEq)]
pub struct Time {
    years: Option<f64>,
    months: Option<f64>,
    weeks: Option<f64>,
    days: Option<f64>,
    hours: Option<f64>,
    minutes: Option<f64>,
    seconds: Option<f64>,
}

impl Time {
    fn is_valid(&self) -> bool {
        !(self.years.is_none()
            && self.months.is_none()
            && self.weeks.is_none()
            && self.days.is_none()
            && self.hours.is_none()
            && self.minutes.is_none()
            && self.seconds.is_none())
    }

    pub fn to_duration(&self) -> Duration {
        let secs = self.years.unwrap_or(0.0) * 365.25 * 24.0 * 3600.0
            + self.months.unwrap_or(0.0) * 30.44 * 24.0 * 3600.0
            + self.weeks.unwrap_or(0.0) * 7.0 * 24.0 * 3600.0
            + self.days.unwrap_or(0.0) * 24.0 * 3600.0
            + self.hours.unwrap_or(0.0) * 3600.0
            + self.minutes.unwrap_or(0.0) * 60.0
            + self.seconds.unwrap_or(0.0);

        Duration::new(secs as u64, 0 as u32)
    }
}

/// Parse duration string such as `2h3m1s`
#[rustfmt::skip]
pub fn time(input: &mut InputStream) -> PResult<Time> {
    seq! {Time {
        years:   opt(terminated(float, "y")),
        months:  opt(terminated(float, "M")),
        weeks:   opt(terminated(float, "w")),
        days:    opt(terminated(float, "d")),
        hours:   opt(terminated(float, "h")),
        minutes: opt(terminated(float, "m")),
        seconds: opt(terminated(float, "s"))
    }}
    .verify(|t| t.is_valid())
    .parse_next(input)
}

/// Parse optional time string such as `3h2s` or `?`
#[rustfmt::skip]
pub fn optional_time(input: &mut InputStream) -> PResult<Option<Time>> {
    alt((
        time.map(|x| Some(x)),
        tag("?").map(|_| None)
    ))
    .parse_next(input)
}

/// Parse persent string such as `10.3%`
pub fn persent(input: &mut InputStream) -> PResult<f64> {
    terminated(float, "%").parse_next(input)
}

#[derive(Debug, Clone, PartialEq)]
pub struct Progress {
    pub pfinished: Size,
    pub ptotal: Size,
    pub pspeed: Option<Size>,
    pub ppersent: f64,
    pub pleft: Option<Time>,
}

/// Parse progress like `15.61 MiB / 16.40 MiB [============>--] 1.12 MiB p/s 95.19% 0s`
pub fn progress(input: &mut InputStream) -> PResult<Progress> {
    seq! {Progress {
        pfinished: size,
        _: (space1, "/", space1),
        ptotal: size,
        _: (space1, ("[", take_until1("]"), "]"), space1),
        pspeed: terminated(optional_size, (space0, "p/s")),
        _: space1,
        ppersent: persent,
        _: space1,
        pleft: optional_time
    }}
    .parse_next(input)
}

#[derive(Debug, PartialEq)]
pub struct PureProgress {
    pub pfinished: String,
    pub ptotal: String,
    pub pspeed: String,
    pub ppersent: f64,
    pub pleft: String,
}

fn bytes_to_string(bytes: &[u8]) -> String {
    String::from_utf8_lossy(bytes).to_string()
}

pub fn pure_progress(input: &mut InputStream) -> PResult<PureProgress> {
    seq! {PureProgress {
        pfinished: size.recognize().map(bytes_to_string),
        _: (space1, "/", space1),
        ptotal: size.recognize().map(bytes_to_string),
        _: (space1, "[", take_until1("]"), "]", space1),
        pspeed: terminated(optional_size, (space0, "p/s")).recognize().map(bytes_to_string),
        _: space1,
        ppersent: persent,
        _: space1,
        pleft: optional_time.recognize().map(bytes_to_string),
    }}
    .parse_next(input)
}

#[derive(Debug, Clone, PartialEq)]
pub struct Stream {
    pub sid: String,
    pub squality: String,
    pub ssize: f64,
    pub ssize_bytes: f64,
}

/// Parse info stream from lux stdout
fn stream(input: &mut &[u8]) -> PResult<Stream> {
    seq! {Stream {
        sid: delimited("     [", take_until1("]"), "]  -------------------").map(bytes_to_string),
        _: line_ending,
        squality: delimited("     Quality:         ", not_line_ending, line_ending).map(bytes_to_string),
        ssize: delimited("     Size:            ", float, " MiB "),
        ssize_bytes: delimited("(", float, " Bytes)"),
        _: line_ending,
        _: delimited("     # download with: lux -f ", take_until1(" "), " ..."),
        _: (line_ending, line_ending)
    }}
    .parse_next(input)
}

#[derive(Debug, Clone, PartialEq)]
pub struct Header {
    pub hsite: String,
    pub htitle: String,
    pub htype: String,
}

/// Parse info header from lux stdout
fn header(input: &mut &[u8]) -> PResult<Header> {
    seq! {Header {
        _: line_ending,
        hsite: delimited(" Site:      ", not_line_ending, line_ending).map(bytes_to_string),
        htitle: delimited(" Title:     ", not_line_ending, line_ending).map(bytes_to_string),
        htype: delimited(" Type:      ", not_line_ending, line_ending).map(bytes_to_string),
    }}
    .parse_next(input)
}

#[derive(Debug, Clone, PartialEq)]
pub struct Info {
    pub iheader: Header,
    pub istreams: Vec<Stream>,
}

/// Parse info stream from lux stdout
pub fn info(input: &mut &[u8]) -> PResult<Info> {
    seq! {Info {
        iheader: header,
        _: " Streams:   # All available quality",
        _: line_ending,
        istreams: repeat(1.., stream),
    }}
    .parse_next(input)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_size() {
        assert_eq!(
            size.parse_next(&mut Partial::new(b"10 MiB")),
            Ok(Size::MiB(10.0))
        );

        assert!(size
            .parse_next(&mut Partial::new(b"-10.000    MiB"))
            .is_ok());
        assert!(size.parse_next(&mut Partial::new(b"10 MiiB")).is_err());
    }

    #[test]
    fn test_size_to() {
        let size = Size::MiB(1024.0);
        assert_eq!(size.to_f64(), 1024.0 * 1024.0 * 1024.0);
        assert_eq!(size.to_string(), "1024.00 MiB");
    }

    #[test]
    fn test_optional_size() {
        assert_eq!(
            optional_size.parse_next(&mut Partial::new(b"10 MiB")),
            Ok(Some(Size::MiB(10.0)))
        );

        assert!(optional_size
            .parse_next(&mut Partial::new(b"10 MiiB"))
            .is_err());
    }

    #[test]
    fn test_persent() {
        assert_eq!(persent.parse_next(&mut Partial::new(b"20.3%")), Ok(20.3));
    }

    #[test]
    fn test_time() {
        assert_eq!(
            time.parse_next(&mut Partial::new(b"10h20s")),
            Ok(Time {
                hours: Some(10.0),
                seconds: Some(20.0),
                ..Default::default()
            })
        );

        assert_eq!(
            time.parse_next(&mut Partial::new(b"10m10h")),
            Ok(Time {
                minutes: Some(10.0),
                ..Default::default()
            })
        );
    }

    #[test]
    fn test_time_to() {
        let time = Time {
            hours: Some(100.0),
            ..Default::default()
        };
        assert_eq!(time.to_duration(), Duration::from_secs_f64(100.0 * 3600.0));
    }

    #[test]
    fn test_optional_time() {
        assert_eq!(
            optional_time.parse_next(&mut Partial::new(b"10s")),
            Ok(Some(Time {
                seconds: Some(10.0),
                ..Default::default()
            }))
        );

        assert_eq!(optional_time.parse_next(&mut Partial::new(b"?")), Ok(None));

        assert!(time.parse_next(&mut Partial::new(b"")).is_err());
    }

    #[test]
    fn test_progress() {
        assert_eq!(
            progress.parse_next(&mut Partial::new(
                b"15.61 MiB / 16.40 MiB [============>--] 1.12 MiB p/s 95.19% 0s"
            )),
            Ok(Progress {
                pfinished: Size::MiB(15.61),
                ptotal: Size::MiB(16.40),
                pspeed: Some(Size::MiB(1.12)),
                ppersent: 95.19,
                pleft: Some(Time {
                    seconds: Some(0.0),
                    ..Default::default()
                })
            })
        )
    }

    #[test]
    fn test_progress_2() {
        assert_eq!(
            progress.parse_next(&mut Partial::new(
                b"0 MiB / 16.40 MiB [============>--] ? p/s 0.0% ?"
            )),
            Ok(Progress {
                pfinished: Size::MiB(0.0),
                ptotal: Size::MiB(16.40),
                pspeed: None,
                ppersent: 0.0,
                pleft: None
            })
        )
    }

    #[test]
    fn test_pure_progress() {
        assert_eq!(
            pure_progress.parse_next(&mut Partial::new(
                b"15.61 MiB / 16.40 MiB [============>--] 1.12 MiB p/s 95.19% 0s"
            )),
            Ok(PureProgress {
                pfinished: "15.61 MiB".to_string(),
                ptotal: "16.40 MiB".to_string(),
                pspeed: "1.12 MiB p/s".to_string(),
                ppersent: 95.19,
                pleft: "0s".to_string()
            })
        )
    }

    #[test]
    fn test_header() {
        let header_text = r#"
 Site:      Universal
 Title:     th
 Type:      image/jpeg
"#;

        assert_eq!(
            header.parse_next(&mut header_text.as_bytes()),
            Ok(Header {
                hsite: "Universal".to_owned(),
                htitle: "th".to_owned(),
                htype: "image/jpeg".to_owned(),
            })
        )
    }

    #[test]
    fn test_stream() {
        let stream_text = r#"     [default]  -------------------
     Quality:         default
     Size:            0.01 MiB (8106 Bytes)
     # download with: lux -f default ...

"#;
        assert_eq!(
            stream.parse_next(&mut stream_text.as_bytes()),
            Ok(Stream {
                sid: "default".to_owned(),
                squality: "default".to_owned(),
                ssize: 0.01,
                ssize_bytes: 8106.0
            })
        )
    }

    #[test]
    fn test_info() {
        let info_text = r#"
 Site:      Universal
 Title:     th
 Type:      image/jpeg
 Streams:   # All available quality
     [default]  -------------------
     Quality:         default
     Size:            0.01 MiB (8106 Bytes)
     # download with: lux -f default ...

"#;
        assert_eq!(
            info.parse_next(&mut info_text.as_bytes()),
            Ok(Info {
                iheader: Header {
                    hsite: "Universal".to_owned(),
                    htitle: "th".to_owned(),
                    htype: "image/jpeg".to_owned(),
                },
                istreams: vec![Stream {
                    sid: "default".to_owned(),
                    squality: "default".to_owned(),
                    ssize: 0.01,
                    ssize_bytes: 8106.0
                }]
            })
        )
    }

    #[test]
    fn test_info_2() {
        let info_text = r#"
 Site:      哔哩哔哩 bilibili.com
 Title:     【莓机会了】甜到虐哭的13集单集MAD「我现在什么都不想干,更不想看14集」
 Type:      video
 Streams:   # All available quality
     [32-12]  -------------------
     Quality:         清晰 480P hev1.1.6.L120.90
     Size:            16.40 MiB (17195126 Bytes)
     # download with: lux -f 32-12 ...

     [32-7]  -------------------
     Quality:         清晰 480P avc1.64001F
     Size:            11.30 MiB (11845066 Bytes)
     # download with: lux -f 32-7 ...

     [16-12]  -------------------
     Quality:         流畅 360P hev1.1.6.L120.90
     Size:            9.37 MiB (9830366 Bytes)
     # download with: lux -f 16-12 ...

     [16-7]  -------------------
     Quality:         流畅 360P avc1.64001E
     Size:            7.99 MiB (8379190 Bytes)
     # download with: lux -f 16-7 ...

"#;
        assert_eq!(
            info.parse_next(&mut info_text.as_bytes()),
            Ok(Info {
                iheader: Header {
                    hsite: "哔哩哔哩 bilibili.com".to_owned(),
                    htitle: "【莓机会了】甜到虐哭的13集单集MAD「我现在什么都不想干,更不想看14集」"
                        .to_owned(),
                    htype: "video".to_owned()
                },
                istreams: vec![
                    Stream {
                        sid: "32-12".to_owned(),
                        squality: "清晰 480P hev1.1.6.L120.90".to_owned(),
                        ssize: 16.4,
                        ssize_bytes: 17195126.0
                    },
                    Stream {
                        sid: "32-7".to_owned(),
                        squality: "清晰 480P avc1.64001F".to_owned(),
                        ssize: 11.3,
                        ssize_bytes: 11845066.0
                    },
                    Stream {
                        sid: "16-12".to_owned(),
                        squality: "流畅 360P hev1.1.6.L120.90".to_owned(),
                        ssize: 9.37,
                        ssize_bytes: 9830366.0
                    },
                    Stream {
                        sid: "16-7".to_owned(),
                        squality: "流畅 360P avc1.64001E".to_owned(),
                        ssize: 7.99,
                        ssize_bytes: 8379190.0
                    }
                ]
            })
        )
    }
}
最后修改:2024 年 03 月 02 日
如果觉得我的文章对你有用,请随意赞赏