1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
use regex_syntax::hir::ClassUnicodeRange;
use regex_syntax::hir::ClassBytesRange;
use utf8_ranges::Utf8Range;

use std::cmp::{Ord, Ordering};

#[derive(Clone, Copy, PartialOrd, PartialEq, Eq, Hash)]
pub struct Range {
    pub start: u8,
    pub end: u8,
}

impl Range {
    pub fn as_byte(&self) -> Option<u8> {
        if self.is_byte() {
            Some(self.start)
        } else {
            None
        }
    }

    pub fn is_byte(&self) -> bool {
        self.start == self.end
    }
}

impl From<u8> for Range {
    fn from(byte: u8) -> Range {
        Range {
            start: byte,
            end: byte,
        }
    }
}

impl From<&u8> for Range {
    fn from(byte: &u8) -> Range {
        Range::from(*byte)
    }
}

impl Iterator for Range {
    type Item = u8;

    fn next(&mut self) -> Option<u8> {
        if self.start < self.end {
            let res = self.start;
            self.start += 1;

            Some(res)
        } else if self.start == self.end {
            let res = self.start;

            // Necessary so that range 0xFF-0xFF doesn't loop forever
            self.start = 0xFF;
            self.end = 0x00;

            Some(res)
        } else {
            None
        }
    }
}

impl Ord for Range {
    fn cmp(&self, other: &Self) -> Ordering {
        self.start.cmp(&other.start)
    }
}

impl From<Utf8Range> for Range {
    fn from(r: Utf8Range) -> Range {
        Range {
            start: r.start,
            end: r.end,
        }
    }
}

impl From<ClassUnicodeRange> for Range {
    fn from(r: ClassUnicodeRange) -> Range {
        let start = r.start() as u32;
        let end = r.end() as u32;

        if start >= 128 || end >= 128 && end != 0x0010FFFF {
            panic!("Casting non-ascii ClassUnicodeRange to Range")
        }

        Range {
            start: start as u8,
            end: end as u8,
        }
    }
}

impl From<ClassBytesRange> for Range {
    fn from(r: ClassBytesRange) -> Range {
        Range {
            start: r.start(),
            end: r.end(),
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn range_iter_one() {
        let byte = Range::from(b'!');
        let collected = byte.into_iter().take(1000).collect::<Vec<_>>();

        assert_eq!(b"!", &collected[..]);
    }

    #[test]
    fn range_iter_few() {
        let byte = Range { start: b'a', end: b'd' };
        let collected = byte.into_iter().take(1000).collect::<Vec<_>>();

        assert_eq!(b"abcd", &collected[..]);
    }

    #[test]
    fn range_iter_bunds() {
        let byte = Range::from(0xFA..=0xFF);

        let collected = byte.into_iter().take(1000).collect::<Vec<_>>();

        assert_eq!(b"\xFA\xFB\xFC\xFD\xFE\xFF", &collected[..]);
    }
}