[go: up one dir, main page]

comrak 0.2.3

A 100% CommonMark-compatible GitHub Flavored Markdown parser and formatter
Documentation
use regex::bytes::Regex;
use twoway::find_bytes;

fn search(re: &Regex, line: &[u8]) -> Option<usize> {
    re.find(line).map(|m| m.end() - m.start())
}

fn captures(re: &Regex, line: &[u8], ix: usize) -> Option<usize> {
    let c = match re.captures(line) {
        Some(c) => c,
        None => return None,
    };
    c.get(ix).map(|m| m.end() - m.start())
}

fn is_match(re: &Regex, line: &[u8]) -> bool {
    re.is_match(line)
}

pub fn atx_heading_start(line: &[u8]) -> Option<usize> {
    lazy_static! {
        static ref RE: Regex = Regex::new(r"\A(?:#{1,6}([ \t]+|[\r\n]))").unwrap();
    }
    search(&RE, line)
}

pub fn html_block_end_1(line: &[u8]) -> bool {
    lazy_static! {
        static ref RE: Regex = Regex::new(r"\A(?:.*</(script|pre|style)>)").unwrap();
    }
    RE.is_match(line)
}

pub fn html_block_end_2(line: &[u8]) -> bool {
    find_bytes(line, b"-->").is_some()
}

pub fn html_block_end_3(line: &[u8]) -> bool {
    find_bytes(line, b"?>").is_some()
}

pub fn html_block_end_4(line: &[u8]) -> bool {
    line.contains(&b'>')
}

pub fn html_block_end_5(line: &[u8]) -> bool {
    find_bytes(line, b"]]>").is_some()
}

pub fn open_code_fence(line: &[u8]) -> Option<usize> {
    lazy_static! {
        static ref RE: Regex = Regex::new(r"\A(?:(```+|~~~+)[^`\r\n\x00]*[\r\n])").unwrap();
    }
    captures(&RE, line, 1)
}

pub fn close_code_fence(line: &[u8]) -> Option<usize> {
    lazy_static! {
        static ref RE: Regex = Regex::new(r"\A(?:(```+|~~~+)[ \t]*[\r\n])").unwrap();
    }
    captures(&RE, line, 1)
}

lazy_static! {
    static ref BLOCK_TAG_NAMES: Vec<&'static str> = vec![
      "address", "article", "aside", "base", "basefont", "blockquote", "body", "caption", "center",
      "col", "colgroup", "dd", "details", "dialog", "dir", "div", "dl", "dt", "fieldset",
      "figcaption", "figure", "footer", "form", "frame", "frameset", "h1", "h2", "h3", "h4", "h5",
      "h6", "head", "header", "hr", "html", "iframe", "legend", "li", "link", "main", "menu",
      "menuitem", "meta", "nav", "noframes", "ol", "optgroup", "option", "p", "param", "section",
      "source", "title", "summary", "table", "tbody", "td", "tfoot", "th", "thead", "title", "tr",
      "track", "ul",
    ];

    static ref BLOCK_TAG_NAMES_PIPED: String = BLOCK_TAG_NAMES.join("|");
}

pub fn html_block_start(line: &[u8]) -> Option<usize> {
    lazy_static! {
        static ref RE1: Regex = Regex::new(r"\A(?:<(script|pre|style)([ \t\v\f\r\n]|>))").unwrap();
        static ref STR2: &'static [u8] = b"<!--";
        static ref STR3: &'static [u8] = b"<?";
        static ref RE4: Regex = Regex::new(r"\A(?:<![A-Z])").unwrap();
        static ref STR5: &'static [u8] = b"<![CDATA[";
        static ref RE6: Regex = Regex::new(
            &format!(r"\A(?:</?({})([ \t\v\f\r\n]|/?>))", *BLOCK_TAG_NAMES_PIPED)).unwrap();
    }

    if !line.starts_with(b"<") {
        return None;
    }

    if is_match(&RE1, line) {
        Some(1)
    } else if line.starts_with(*STR2) {
        Some(2)
    } else if line.starts_with(*STR3) {
        Some(3)
    } else if is_match(&RE4, line) {
        Some(4)
    } else if line.starts_with(*STR5) {
        Some(5)
    } else if is_match(&RE6, line) {
        Some(6)
    } else {
        None
    }
}

lazy_static! {
    static ref SPACE_CHAR: &'static str = r"(?:[ \t\v\f\r\n])";
    static ref TAG_NAME: &'static str = r"(?:[A-Za-z][A-Za-z0-9-]*)";
    static ref CLOSE_TAG: String = format!(r"(?:/{}{}*>)", *TAG_NAME, *SPACE_CHAR);
    static ref ATTRIBUTE_NAME: &'static str = r"(?:[a-zA-Z_:][a-zA-Z0-9:._-]*)";
    static ref ATTRIBUTE_VALUE: &'static str =
        r#"(?:[^"'=<>`\x00]+|['][^'\x00]*[']|["][^"\x00]*["])"#;
    static ref ATTRIBUTE_VALUE_SPEC: String = format!(
        r"(?:{}*={}*{})", *SPACE_CHAR, *SPACE_CHAR, *ATTRIBUTE_VALUE);
    static ref ATTRIBUTE: String = format!(
        r"(?:{}+{}{}?)", *SPACE_CHAR, *ATTRIBUTE_NAME, *ATTRIBUTE_VALUE_SPEC);
    static ref OPEN_TAG: String = format!(r"(?:{}{}*{}*/?>)", *TAG_NAME, *ATTRIBUTE, *SPACE_CHAR);
    static ref HTML_COMMENT: &'static str = r"(?:!---->|!---?[^\x00>-](-?[^\x00-])*-->)";
    static ref PROCESSING_INSTRUCTION: &'static str = r"\?([^?>\x00]+|\?[^>\x00]|>)*\?>";
    static ref DECLARATION: String = format!(r"![A-Z]+{}+[^>\x00]*>", *SPACE_CHAR);
    static ref CDATA: &'static str = r"!\[CDATA\[([^\]\x00]+|\][^\]\x00]|\]\][^>\x00])*\]\]>";
    static ref HTML_TAG: String = format!(
        r"(?:{}|{}|{}|{}|{}|{})", *OPEN_TAG, *CLOSE_TAG, *HTML_COMMENT,
        *PROCESSING_INSTRUCTION, *DECLARATION, *CDATA);
}

pub fn html_block_start_7(line: &[u8]) -> Option<usize> {
    lazy_static! {
        static ref RE: Regex = Regex::new(
            &format!(r"\A(?:<({}|{})[\t\n\f ]*[\r\n])", *OPEN_TAG, *CLOSE_TAG)).unwrap();
    }

    if is_match(&RE, line) { Some(7) } else { None }
}

pub enum SetextChar {
    Equals,
    Hyphen,
}

pub fn setext_heading_line(line: &[u8]) -> Option<SetextChar> {
    lazy_static! {
        static ref RE: Regex = Regex::new(r"\A(?:(=+|-+)[ \t]*[\r\n])").unwrap();
    }

    if is_match(&RE, line) {
        if line[0] == b'=' {
            Some(SetextChar::Equals)
        } else {
            Some(SetextChar::Hyphen)
        }
    } else {
        None
    }
}

pub fn thematic_break(line: &[u8]) -> Option<usize> {
    lazy_static! {
        static ref RE: Regex = Regex::new(
            r"\A(?:((\*[ \t]*){3,}|(_[ \t]*){3,}|(-[ \t]*){3,})[ \t]*[\r\n])").unwrap();
    }
    search(&RE, line)
}

lazy_static! {
    static ref SCHEME: &'static str = r"[A-Za-z][A-Za-z0-9.+-]{1,31}";
}

pub fn scheme(line: &[u8]) -> Option<usize> {
    lazy_static! {
        static ref RE: Regex = Regex::new(
            &format!(r"\A(?:{}:)", *SCHEME)).unwrap();
    }

    search(&RE, line)
}

pub fn autolink_uri(line: &[u8]) -> Option<usize> {
    lazy_static! {
        static ref RE: Regex = Regex::new(
            &format!(r"\A(?:{}:[^\x00-\x20<>]*>)", *SCHEME)).unwrap();
    }

    search(&RE, line)
}

pub fn autolink_email(line: &[u8]) -> Option<usize> {
    lazy_static! {
        static ref RE: Regex = Regex::new(
            concat!(
            r"\A(?:",
            "[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+",
            r"@",
            r"[a-zA-Z0-9]",
            r"([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?",
            r"(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*",
            r">",
            r")")).unwrap();
    }

    search(&RE, line)
}

pub fn html_tag(line: &[u8]) -> Option<usize> {
    lazy_static! {
        static ref RE: Regex = Regex::new(&format!(r"\A(?:{})", *HTML_TAG)).unwrap();
    }

    search(&RE, line)
}

pub fn spacechars(line: &[u8]) -> Option<usize> {
    lazy_static! {
        static ref RE: Regex = Regex::new(r"\A(?:[ \t\v\f\r\n]+)").unwrap();
    }

    search(&RE, line)
}

pub fn link_title(line: &[u8]) -> Option<usize> {
    lazy_static! {
        static ref RE: Regex = Regex::new(
            &format!(r#"\A(?:"({}|[^"\x00])*"|'({}|[^'\x00])*'|\(({}|[^)\x00]*)*\))"#,
            *ESCAPED_CHAR, *ESCAPED_CHAR, *ESCAPED_CHAR)).unwrap();
    }

    search(&RE, line)
}

lazy_static! {
    static ref ESCAPED_CHAR: &'static str = r##"(?:\\[!"#$%&'()*+,./:;<=>?@\[\\\]^_`{|}~-])"##;
    static ref TABLE_SPACECHAR: &'static str = r"(?:[ \t\v\f])";
    static ref TABLE_NEWLINE: &'static str = r"(?:\r?\n)";
    static ref TABLE_MARKER: String = format!(r"(?:{}*:?-+:?{}*)",
    *TABLE_SPACECHAR, *TABLE_SPACECHAR);
    static ref TABLE_CELL: String = format!(r"(?:({}|[^|\r\n])*)", *ESCAPED_CHAR);
}

pub fn table_start(line: &[u8]) -> Option<usize> {
    lazy_static! {
        static ref RE: Regex = Regex::new(
            &format!(r"\A\|?{}(\|{})*\|?{}*{}",
            *TABLE_MARKER, *TABLE_MARKER, *TABLE_SPACECHAR, *TABLE_NEWLINE)).unwrap();
    }

    search(&RE, line)
}

pub fn table_cell(line: &[u8]) -> Option<usize> {
    lazy_static! {
        static ref RE: Regex = Regex::new(&format!(r"\A{}", *TABLE_CELL)).unwrap();
    }

    search(&RE, line)
}

pub fn table_cell_end(line: &[u8]) -> Option<usize> {
    lazy_static! {
        static ref RE: Regex = Regex::new(
            &format!(r"\A\|{}*{}?", *TABLE_SPACECHAR, *TABLE_NEWLINE)).unwrap();
    }

    search(&RE, line)
}

pub fn table_row_end(line: &[u8]) -> Option<usize> {
    lazy_static! {
        static ref RE: Regex = Regex::new(
            &format!(r"\A{}*{}", *TABLE_SPACECHAR, *TABLE_NEWLINE)).unwrap();
    }

    search(&RE, line)
}