頭の悪いパーサコンビネータ

普通に再帰下降で書いていたはずが、定型句が面倒になって、関数を作る関数に変えていったらこんなふうになってしまいました。

  (* 2.3:
  identifier ::= 
     identifier_start {identifier_start | identifier_extend} *)
  let rec identifier () = p_do (
    identifier_start @=>
    !// Buffer.of_char @=>
    (identifier_start @|| identifier_extend) /*/ Buffer.add_char_r @=>
    !// Buffer.contents
    : string p_type
  )
  (* 2.3:
  identifier_start ::= 
       letter_uppercase
     | letter_lowercase
     | letter_titlecase
     | letter_modifier
     | letter_other
     | number_letter *)
  and identifier_start () = p_do (
    p_symbol_i_range 'A' 'Z' @|| p_symbol_range '\x80' '\xff'
  )
  (* 2.3:
  identifier_extend ::= 
       mark_non_spacing
     | mark_spacing_combining
     | number_decimal
     | punctuation_connector
     | other_format *)
  and identifier_extend () = p_do (
    p_symbol_range '0' '9' @|| p_symbol '_' @|| p_symbol_range '\x80' '\xff'
  )
  (* 2.4:
  numeric_literal ::= decimal_literal | based_literal *)
  and numeric_literal () = p_do (
    based_literal @|| decimal_literal
  )
  (* 2.4.1:
  decimal_literal ::= numeral [.numeral] [exponent] *)
  and decimal_literal () = p_do (
    numeral @=>
    !// Buffer.of_string @=>
    (p_symbol '.' @&& numeral) /?/ Buffer.add_char_string_r @=>
    exponent /?/ Buffer.add_string_r @=>
    !// Buffer.contents
  )
  (* 2.4.1:
  numeral ::= digit {[underline] digit} *)
  and numeral () = p_do (
    digit @=>
    !// Buffer.of_char @=>
    (p_symbol '_' @?&& digit) /*/ (fun buffer (s, d) ->
      if s = Some '_' then Buffer.add_char buffer '_';
      Buffer.add_char buffer d;
      buffer
    ) @=>
    !// Buffer.contents
  )
  (* 2.4.1:
  exponent ::= E [+] numeral | E - numeral *)
  and exponent () = p_do (
    p_symbol_i 'E' @=>
    !// Buffer.of_char @=>
    (p_symbol '+' @|| p_symbol '-') /?/ Buffer.add_char_r @=>
    numeral // Buffer.add_string_r @=>
    !// Buffer.contents
  )
  (* 2.4.1:
  digit ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 *)
  and digit () = p_do (
    p_symbol_range '0' '9'
  )
  (* 2.4.2:
  based_literal ::= 
     base # based_numeral [.based_numeral] # [exponent] *)
  and based_literal () = p_do (
    base @=>
    !// Buffer.of_string @=>
    (p_symbol '#') // Buffer.add_char_r @=> 
    based_numeral // Buffer.add_string_r @=> 
    (p_symbol '.' @&& based_numeral) /?/ Buffer.add_char_string_r @=>
    (p_symbol '#') // Buffer.add_char_r /%>/ "\'#\' required." @=>
    exponent /?/ Buffer.add_string_r @=>
    !// Buffer.contents
  )
  (* 2.4.2:
  base ::= numeral *)
  and base () = numeral ()
  (* 2.4.2:
  based_numeral ::= 
     extended_digit {[underline] extended_digit} *)
  and based_numeral () = p_do (
    extended_digit @=>
    !// Buffer.of_char @=>
    (p_symbol '_' @?&& extended_digit) /*/ (fun buffer (s, d) ->
      if s = Some '_' then Buffer.add_char buffer '_';
      Buffer.add_char buffer d;
      buffer
    ) @=>
    !// Buffer.contents
  )
  (* 2.4.2:
  extended_digit ::= digit | A | B | C | D | E | F *)
  and extended_digit () = p_do (
    digit @|| p_symbol_i_range 'A' 'F'
  )
  (* 2.5:
  character_literal ::= 'graphic_character' *)
  and character_literal () = p_do (
    p_symbol '\'' // p_ignore @=>
    p_any_char /// (fun warn p () c -> 
      if c < '\x20' then warn p "This is not a graphic character.";
      c
    ) @=>
    p_symbol '\'' // p_ignore /%>/ "The character literal should be closed." 
    : char p_type
  )
  (* 2.6:
  string_literal ::= "{string_element}" *)
  and string_literal () = p_do (
    p_symbol '\"' // p_ignore @=>
    !// Buffer.create_empty @=>
    string_element /*/ Buffer.add_char_r @=>
    p_symbol '\"' // p_ignore /%>/ "The string literal should be closed." @=>
    !// Buffer.contents
    : string p_type
  )
  (* 2.6:
  string_element ::= "" | non_quotation_mark_graphic_character *)
  and string_element () = p_do (
    (
      p_symbol '\"' // p_ignore @=> 
      p_symbol '\"'
    ) @|| (
      (p_symbol_range '\x00' '\x21' @|| p_symbol_range '\x23' '\xff') /// (fun warn p () c -> 
        if c < '\x20' then warn p "This is not a graphic character.";
        c
      )
    )
  )

関数型言語の構文(部分適用が一見わからないとかそんな)上で、これ毎回書くの面倒だから隠しパラメータにしてごにょごにょやってたらパーサコンビネータに行き着くのは必然と思った。
↓こーゆーのをかましてる。効率悪そうだ。

module Utilities = functor (Env: PARSING_ENV) -> struct

  type p_pos = Env.position_key;;

  type 'a p_type = unit -> p_pos -> Env.t -> ('a * p_pos) option;;

  let rec p_do (f: 'a p_type) (_: p_pos) (e: Env.t) = (
    f () (Env.position e) e
  );;

  (* parser *)

  let p_any_char () (p: p_pos) e = (
    match Env.peek e with
    | Some r -> 
      Env.junk e;
      Some (r, p)
    | None -> None
  );;

  let p_symbol c () (p: p_pos) e = (
    match Env.peek e with
    | Some r -> 
      if r = c then (
        Env.junk e;
        Some (r, p)
      ) else (
        None
      )
    | None -> None
  );;

  let p_symbol_i c () (p: p_pos) e = (
    match Env.peek e with
    | Some r -> 
      if Char.uppercase r = c then (
        Env.junk e;
        Some (r, p)
      ) else (
        None
      )
    | None -> None
  );;

  let p_symbol_range cf cl () (p: p_pos) e = (
    match Env.peek e with
    | Some r -> 
      if r >= cf && r <= cl then (
        Env.junk e;
        Some (r, p)
      ) else (
        None
      )
    | None -> None
  );;

  let p_symbol_i_range cf cl () (p: p_pos) e = (
    match Env.peek e with
    | Some r -> 
      let u = Char.uppercase r in
      if u >= cf && u <= cl then (
        Env.junk e;
        Some (r, p)
      ) else (
        None
      )
    | None -> None
  );;

  let p_position () (p: p_pos) e = (
    Some (Env.position e, p)
  );;

  let p_set_position v (_: p_pos) e = (
    Some (v, Env.position e)
  );;

  (* value combinator *)

  let p_ignore x _ = x;;

  (* postfix *)

  let ( !// ) f v (p: p_pos) (_: Env.t) = (
    Some (f v, p)
  );;

  let ( !/// ) (f: (p_pos -> string -> unit) -> p_pos -> 'a -> 'b) v (p: p_pos) e = (
    Some (f (Env.warning e) p v, p)
  );;

  (* infix left to right *)

  let ( // ) (s: 'a p_type) func v p (e: Env.t) = (
    begin match s () p e with
    | Some (x, p2) -> Some (func v x, p2)
    | None -> None
    end
  );;

  let ( /// ) (s: 'a p_type) (f: (p_pos -> string -> unit) -> p_pos -> 'b -> 'a -> 'c) v (p: p_pos) (e: Env.t) = (
    begin match s () p e with
    | Some (x, p2) -> Some (f (Env.warning e) p v x, p2)
    | None -> None
    end
  );;

  let (/?/) s f v (p: p_pos) (e: Env.t) = (
    match s () p e with
    | Some (x, p2) -> Some (f v x, p2)
    | None -> Some (v, p)
  );;

  let rec ( /*/ ) s f v (p: p_pos) (e: Env.t) = (
    match s () p e with
    | Some (x, p2) ->
      ( /*/ ) s f (f v x) p2 e
    | None -> Some (v, p)
  );;

  let ( /%</ ) s message v (p: p_pos) (e: Env.t) = (
    begin match s v p e with
    | Some (_, (_: p_pos)) as r -> r
    | None -> 
      Env.error e p message;
      Some (v, p)
    end
  );;

  let ( /%>/ ) s message v (p: p_pos) (e: Env.t) = (
    begin match s v p e with
    | Some (_, (_: p_pos)) as r -> r
    | None -> 
      Env.error e (Env.position e) message;
      Some (v, p)
    end
  );;

  (* infix right to left *)

  let ( @|| ) s1 s2 () (p: p_pos) (e: Env.t) = (
    let r1 = s1 () p e in
    begin match r1 with
    | Some (_, (_: p_pos)) -> r1
    | None -> s2 () p e
    end
  );;

  let ( @&& ) s1 s2 () (p: p_pos) e = (
    let i = Env.position e in
    begin match s1 () p e with
    | Some (v1, (p2: p_pos)) ->
      begin match s2 () p2 e with
      | Some (v2, (p3: p_pos)) -> Some ((v1, v2), p3)
      | None -> 
        Env.rollback e i;
        None
      end
    | None -> None
    end
  );;

  let ( @?&& ) s1 s2 () (p: p_pos) e = (
    let i = Env.position e in
    let (r1, p2) = (
      match s1 () p e with 
      | Some (x, pr) -> (Some x, pr)
      | None -> (None, p)
    ) in
    begin match s2 () p2 e with
    | Some (r2, (p3: p_pos)) -> Some ((r1, r2), p3)
    | None -> 
      Env.rollback e i;
      None
    end
  );;

  let ( @=> ) s1 s2 v (p: p_pos) e = (
    let i = Env.position e in
    begin match s1 v p e with
    | Some (r1, (p2: p_pos)) ->
      begin match s2 r1 p2 e with
      | Some (_, (_: p_pos)) as r2 -> r2
      | None -> 
        Env.rollback e i;
        None
      end
    | None -> None
    end
  );;

end;;