type token = Plus | Dash | Star | Slash | LParen | RParen | Int of string
 
(* string -> token list
"+ 1 2" -> [Plus; Int("1"); Int("2")]
*)
 
let lexer program =
	let mkcompile s = Re.compile (Re.Perl.re s) in
	let plus_re = mkcompile "^\\+" in
	let mult_re = mkcompile "^\\*" in
	let sub_re = mkcompile "^-" in
	let div_re = mkcompile "^\\/" in
	let lp_re = mkcompile "^\\(" in
	let rp_re = mkcompile "^\\)" in
	let num_re = mkcompile "^-?\\d+" in
	let ws_re = mkcompile " " in
	let rec lexer program =
		if Re.execp plus_re program then
			Plus::(lexer (String.sub program 1 ((String.len program) - 1)))
		else if Re.execp mult_re program then
			Star::(lexer (String.sub program 1 ((String.len program) - 1)))
		else if Re.execp num_re program then
			let num = Re.Group.get (Re.exec num_re program) 0 in
			let num_len = String.len num in
			Int(num)::(lexer (String.sub program num_len ((String.len program) - num_len)))
		else if Re.execp sub_re program then
			Dash::(lexer (String.sub program 1 ((String.len program) - 1)))
		else if Re.execp div_re program then
			Slash::(lexer (String.sub program 1 ((String.len program) - 1)))
		else if Re.execp lp_re program then
			LParen::(lexer (String.sub program 1 ((String.len program) - 1)))
		else if Re.execp rp_re program then
			RParen::(lexer (String.sub program 1 ((String.len program) - 1)))
		else if Re.exccp ws_re program then
			lexer (String.sub program 1 ((String.len program) - 1))
		else
			failwith "word not valid"
	in lexer program

Building tree

type ast = Value of String | Add of ast * ast | Minus of ast * ast | Mult of ast * ast | Div of ast * ast;;
(* type parse_tree = Value of string | Add of "+" * parse_tree * parse_tree | ... | Paren of "(" * parse_tree * ")" | ...;; *)
 
(* token list -> ast *)
(* We will make an LL(k) parser, cannot deal with ambiguity, they are recursive descent parsers
LL(k) says left associative, lookahead by k tokens *)
let parser tokens = 
	(* token list -> ast, token list *)
	let parse_e tokens = match tokens with
		(* E trees start with Plus *)
		|Plus::toks -> (* the first thing after is T *) let ttree, tremain = parse_t toks in
		(* then the first thing after the T is an E *)
			let etree, eremain = parse_e tremain in
			Add(ttree, etree), eremain
		(* Or, E trees start with Dash *)
		|Dash::toks -> let ttree, tremain = parse_t toks in
			let etree, eremain = parse_e tremain in
			Minus(ttree, etree), eremain
		(* if it's none of these things, it HAS to be a T-tree by definition of our E tree production rule *)
		|_ -> parse_t tokens (*ttree, tremain*)
	and
		parse_t tokens = let ntree, nremain = parse_n tokens in
		match nremain with
		|Star::toks -> let ttree, tremain = parse_t toks in
			Mult(ntree, ttree), tremain
		|Slash::toks -> let ttree, tremain = parse_t toks in
			Div(ntree, ttree), tremain
			(* if it's none of these things, it HAS to be a N-tree by definition of our N tree production rule *)
		|_ -> ntree, nremain
	and parse_n tokens = match tokens with
		|Int(x)::toks -> Value(x), toks
		|LParen::toks -> let etree, eremain = parse_e toks in
			match eremain with
			RParen -> etree, eremain
			|_ -> failwith "unbalanced parenthesis"
		|_ -> failwith "grammar is wrong"
	in
	let etree, eremain = parse_e tokens in
	match eremain with
	[] -> etree
	|_ -> failwith "leftover tokens"