頭の悪いパーサコンビネータ
普通に再帰下降で書いていたはずが、定型句が面倒になって、関数を作る関数に変えていったらこんなふうになってしまいました。
(* 2.3: identifier ::= identifier_start {identifier_start | identifier_extend} *) let rec identifier () = p_do ( identifier_start @=> !// Buffer.of_char @=> (identifier_start @|| identifier_extend) /*/ Buffer.add_char_r @=> !// Buffer.contents : string p_type ) (* 2.3: identifier_start ::= letter_uppercase | letter_lowercase | letter_titlecase | letter_modifier | letter_other | number_letter *) and identifier_start () = p_do ( p_symbol_i_range 'A' 'Z' @|| p_symbol_range '\x80' '\xff' ) (* 2.3: identifier_extend ::= mark_non_spacing | mark_spacing_combining | number_decimal | punctuation_connector | other_format *) and identifier_extend () = p_do ( p_symbol_range '0' '9' @|| p_symbol '_' @|| p_symbol_range '\x80' '\xff' ) (* 2.4: numeric_literal ::= decimal_literal | based_literal *) and numeric_literal () = p_do ( based_literal @|| decimal_literal ) (* 2.4.1: decimal_literal ::= numeral [.numeral] [exponent] *) and decimal_literal () = p_do ( numeral @=> !// Buffer.of_string @=> (p_symbol '.' @&& numeral) /?/ Buffer.add_char_string_r @=> exponent /?/ Buffer.add_string_r @=> !// Buffer.contents ) (* 2.4.1: numeral ::= digit {[underline] digit} *) and numeral () = p_do ( digit @=> !// Buffer.of_char @=> (p_symbol '_' @?&& digit) /*/ (fun buffer (s, d) -> if s = Some '_' then Buffer.add_char buffer '_'; Buffer.add_char buffer d; buffer ) @=> !// Buffer.contents ) (* 2.4.1: exponent ::= E [+] numeral | E - numeral *) and exponent () = p_do ( p_symbol_i 'E' @=> !// Buffer.of_char @=> (p_symbol '+' @|| p_symbol '-') /?/ Buffer.add_char_r @=> numeral // Buffer.add_string_r @=> !// Buffer.contents ) (* 2.4.1: digit ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 *) and digit () = p_do ( p_symbol_range '0' '9' ) (* 2.4.2: based_literal ::= base # based_numeral [.based_numeral] # [exponent] *) and based_literal () = p_do ( base @=> !// Buffer.of_string @=> (p_symbol '#') // Buffer.add_char_r @=> based_numeral // Buffer.add_string_r @=> (p_symbol '.' @&& based_numeral) /?/ Buffer.add_char_string_r @=> (p_symbol '#') // Buffer.add_char_r /%>/ "\'#\' required." @=> exponent /?/ Buffer.add_string_r @=> !// Buffer.contents ) (* 2.4.2: base ::= numeral *) and base () = numeral () (* 2.4.2: based_numeral ::= extended_digit {[underline] extended_digit} *) and based_numeral () = p_do ( extended_digit @=> !// Buffer.of_char @=> (p_symbol '_' @?&& extended_digit) /*/ (fun buffer (s, d) -> if s = Some '_' then Buffer.add_char buffer '_'; Buffer.add_char buffer d; buffer ) @=> !// Buffer.contents ) (* 2.4.2: extended_digit ::= digit | A | B | C | D | E | F *) and extended_digit () = p_do ( digit @|| p_symbol_i_range 'A' 'F' ) (* 2.5: character_literal ::= 'graphic_character' *) and character_literal () = p_do ( p_symbol '\'' // p_ignore @=> p_any_char /// (fun warn p () c -> if c < '\x20' then warn p "This is not a graphic character."; c ) @=> p_symbol '\'' // p_ignore /%>/ "The character literal should be closed." : char p_type ) (* 2.6: string_literal ::= "{string_element}" *) and string_literal () = p_do ( p_symbol '\"' // p_ignore @=> !// Buffer.create_empty @=> string_element /*/ Buffer.add_char_r @=> p_symbol '\"' // p_ignore /%>/ "The string literal should be closed." @=> !// Buffer.contents : string p_type ) (* 2.6: string_element ::= "" | non_quotation_mark_graphic_character *) and string_element () = p_do ( ( p_symbol '\"' // p_ignore @=> p_symbol '\"' ) @|| ( (p_symbol_range '\x00' '\x21' @|| p_symbol_range '\x23' '\xff') /// (fun warn p () c -> if c < '\x20' then warn p "This is not a graphic character."; c ) ) )
関数型言語の構文(部分適用が一見わからないとかそんな)上で、これ毎回書くの面倒だから隠しパラメータにしてごにょごにょやってたらパーサコンビネータに行き着くのは必然と思った。
続きを読む