LPeg

From Wiki
Revision as of 13:43, 24 June 2008 by Wolfgang Schuster (talk | contribs) (Example for the use of LPeg.)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Writing a parser with LPeg

To parser input files for the use with TeX you could either write Lua macros to convert pass the input to TeX or use LPeg to parse the files in a more simple manner.

First version for a sgf parser

The code below reads input in sgf syntax and pass the content to TeX macros.

\startluacode

do

    thirddata     = thirddata     or { }
    thirddata.sgf = thirddata.sgf or { }

    local function command(name,x)
        tex.sprint(tex.texcatcodes,string.format("\\csname sgf!%s\\endcsname{%s}",name,x))
    end

    local nodes = { }

    function nodes.B(x) command("black" ,x) end
    function nodes.W(x) command("white" ,x) end

    local function action(what,data)
        local a = nodes[what]
        if a then
            a(data)
        else
            print("unknown action: " .. what)
        end
    end

    local function start()
        nesting = nesting + 1
        command("start",nesting)
    end

    local function stop()
        command("stop",nesting)
        nesting = nesting - 1
    end

	local space    = lpeg.S(' \r\n')^1
	local lower    = lpeg.R("az")
	local upper    = lpeg.R("AZ")
	local letter   = lower + upper
	local position = letter^2
    local left     = lpeg.P("(")
    local right    = lpeg.P(")")
    local none     = 1- (left + right)

	local node     = (lpeg.P(";") * lpeg.C(lpeg.S("BW")) * lpeg.P("[") * lpeg.C(position) * lpeg.P("]"))/action
    local branch   = lpeg.P { left/start * (lpeg.V(1) + node + space)^0 * right/stop }

	local parser   = (branch + node + space)^0

    function thirddata.sgf.parsea(str)
        nesting = 0
        parser:match(str)
    end

    local function nest(str)
        tex.sprint(tex.ctxcatcodes,string.format("\\parsesgfb{%s}",str))
    end

	local node     = (lpeg.P(";") * lpeg.C(lpeg.S("BW")) * lpeg.P("[") * lpeg.C(position) * lpeg.P("]"))/action
    local branch   = lpeg.P { left * (lpeg.V(1) + (none^1/nest) + space)^0 * right }

	local parser   = (branch + node + space)^0

    function thirddata.sgf.parseb(str)
        nesting = 0
        parser:match(str)
    end

end

\stopluacode

To work with the output from the parser we need the following TeX macros.

\long\def\parsesgfa#1{\ctxlua{thirddata.sgf.parsea("#1")}}
\long\def\parsesgfb#1{\ctxlua{thirddata.sgf.parseb("#1")}}

\starttexdefinition sgf!start #1
    \par \advance\leftskip+2em
\stoptexdefinition

\starttexdefinition sgf!stop #1
    \par \advance\leftskip-2em
\stoptexdefinition

\starttexdefinition sgf!node #1
  Node: #1\par
\stoptexdefinition

\starttexdefinition sgf!white #1
  White: #1\par
\stoptexdefinition

\starttexdefinition sgf!black #1
  Black: #1\par
\stoptexdefinition

\protect

\starttext

\parsesgfa{(;B[aa];W[bb](;B[cc](;B[dd];W[ee](;B[ff]);W[gg]));W[hh])}
\blank
\parsesgfb{(;B[aa];W[bb](;B[cc](;B[dd];W[ee](;B[ff]);W[gg]));W[hh])}

\stoptext

A extended version

The second version of the sgf parser is less restricted to the arguments for the commands in the input and delimits the arguments in a slightly different way than the first version.

\startluacode

do

    thirddata     = thirddata     or { }
    thirddata.sgf = thirddata.sgf or { }

    local function command(name,x)
        tex.sprint(tex.texcatcodes,string.format("\\csname sgf!%s\\endcsname{%s}",name,x))
    end

    local nodes = { }

    function nodes.B (x) command("black"    ,x) end
    function nodes.W (x) command("white"    ,x) end
    function nodes.AW(x) command("addwhite" ,x) end
    function nodes.C (x) command("comment"  ,x) end

    local function action(what,data)
        local a = nodes[what]
        if a then
			for w in string.gmatch(data, "%b[]") do
            	a(string.sub(w,2,-2))
            end
        else
            print("unknown action: " .. what)
        end
    end

	local function nodecontent(str)
        tex.sprint(tex.ctxcatcodes,string.format("\\csname sgf!node\\endcsname{%s}",string.sub(str,2)))
	end

	local space      = lpeg.S(' \r\n')^1
	local lcletter   = lpeg.R("az")
	local ucletter   = lpeg.R("AZ")
	local letter     = lcletter + ucletter

	local propindent = ucletter^1

	local property   = lpeg.C(propindent) * lpeg.C{ (lpeg.P("[") * (1 - lpeg.S"[]")^0 * lpeg.P("]"))^1} / action
	
	local function nest(str)
		tex.sprint(tex.ctxcatcodes,string.format("\\parsesgf{%s}",string.sub(str,2,-2)))
	end

	local node   = lpeg.P{ ";" * (propindent * (lpeg.P("[") * (1 - lpeg.S"[]")^0 * lpeg.P("]"))^1)^1} / nodecontent
	local branch = lpeg.P{ "(" * ((1 - lpeg.S"()") + lpeg.V(1))^0 * ")" } / nest

	local parser = (branch + node + property + space)^0

	function thirddata.sgf.parse(str)
		parser:match(str)
	end

end

\stopluacode

The TeX code for the second version is nearly the same as in the first draft, this was the goal for both version because it is more important to keep the interface in TeX and make the modifications in low level TeX or Lua code.

\def\sgfflush#1{\ctxlua{thirddata.sgf.parse("#1")}}

\starttexdefinition parsesgf #1
  \par \advance\leftskip+2em
  \sgfflush{#1}
  \par \advance\leftskip-2em
\stoptexdefinition

\starttexdefinition sgf!node #1
  \par \sgfflush{#1}
\stoptexdefinition

\starttexdefinition sgf!white #1
  White: #1\quad
\stoptexdefinition

\starttexdefinition sgf!black #1
  Black: #1\quad
\stoptexdefinition

\starttexdefinition sgf!addwhite #1
  Add White: #1\quad
\stoptexdefinition

\starttexdefinition sgf!comment #1
  Comment: #1\quad
\stoptexdefinition

\protect

\starttext

\parsesgf{(;C[First move in game 123]B[aa]AW[aa];W[bb](;B[cc](;B[dd];W[ee][ff](;B[ff]);W[gg]));W[hh])}

\stoptext

External Links