Difference between revisions of "LPeg"
Jump to navigation
Jump to search
(Example for the use of LPeg.) |
m (two external links from the mailing list added) |
||
(3 intermediate revisions by 2 users not shown) | |||
Line 21: | Line 21: | ||
local nodes = { } | local nodes = { } | ||
− | function nodes.B(x) command("black" ,x) end | + | function nodes.B(x) command("black",x) end |
− | function nodes.W(x) command("white" ,x) end | + | function nodes.W(x) command("white",x) end |
local function action(what,data) | local function action(what,data) | ||
Line 43: | Line 43: | ||
end | end | ||
− | + | local space = lpeg.S(' \r\n')^1 | |
− | + | local lower = lpeg.R("az") | |
− | + | local upper = lpeg.R("AZ") | |
− | + | local letter = lower + upper | |
− | + | local position = letter^2 | |
local left = lpeg.P("(") | local left = lpeg.P("(") | ||
local right = lpeg.P(")") | local right = lpeg.P(")") | ||
local none = 1- (left + right) | local none = 1- (left + right) | ||
− | + | local node = (lpeg.P(";") * lpeg.C(lpeg.S("BW")) * lpeg.P("[") * lpeg.C(position) * lpeg.P("]")) / action | |
local branch = lpeg.P { left/start * (lpeg.V(1) + node + space)^0 * right/stop } | local branch = lpeg.P { left/start * (lpeg.V(1) + node + space)^0 * right/stop } | ||
− | + | local parser = (branch + node + space)^0 | |
function thirddata.sgf.parsea(str) | function thirddata.sgf.parsea(str) | ||
Line 66: | Line 66: | ||
end | end | ||
− | + | local node = (lpeg.P(";") * lpeg.C(lpeg.S("BW")) * lpeg.P("[") * lpeg.C(position) * lpeg.P("]")) / action | |
local branch = lpeg.P { left * (lpeg.V(1) + (none^1/nest) + space)^0 * right } | local branch = lpeg.P { left * (lpeg.V(1) + (none^1/nest) + space)^0 * right } | ||
− | + | local parser = (branch + node + space)^0 | |
function thirddata.sgf.parseb(str) | function thirddata.sgf.parseb(str) | ||
Line 88: | Line 88: | ||
\starttexdefinition sgf!start #1 | \starttexdefinition sgf!start #1 | ||
− | \par \advance\leftskip+2em | + | \par \advance\leftskip by +2em |
\stoptexdefinition | \stoptexdefinition | ||
\starttexdefinition sgf!stop #1 | \starttexdefinition sgf!stop #1 | ||
− | \par \advance\leftskip-2em | + | \par \advance\leftskip by -2em |
\stoptexdefinition | \stoptexdefinition | ||
\starttexdefinition sgf!node #1 | \starttexdefinition sgf!node #1 | ||
− | + | Node: #1\par | |
\stoptexdefinition | \stoptexdefinition | ||
\starttexdefinition sgf!white #1 | \starttexdefinition sgf!white #1 | ||
− | + | White: #1\par | |
\stoptexdefinition | \stoptexdefinition | ||
\starttexdefinition sgf!black #1 | \starttexdefinition sgf!black #1 | ||
− | + | Black: #1\par | |
\stoptexdefinition | \stoptexdefinition | ||
Line 136: | Line 136: | ||
local nodes = { } | local nodes = { } | ||
− | function nodes.B (x) command("black" | + | function nodes.B (x) command("black" ,x) end |
− | function nodes.W (x) command("white" | + | function nodes.W (x) command("white" ,x) end |
− | function nodes.AW(x) command("addwhite" ,x) end | + | function nodes.AW(x) command("addwhite",x) end |
− | function nodes.C (x) command("comment" | + | function nodes.C (x) command("comment" ,x) end |
local function action(what,data) | local function action(what,data) | ||
local a = nodes[what] | local a = nodes[what] | ||
if a then | if a then | ||
− | + | for w in string.gmatch(data,"%b[]") do | |
− | + | a(string.sub(w,2,-2)) | |
end | end | ||
else | else | ||
Line 152: | Line 152: | ||
end | end | ||
− | + | local function nodecontent(str) | |
tex.sprint(tex.ctxcatcodes,string.format("\\csname sgf!node\\endcsname{%s}",string.sub(str,2))) | tex.sprint(tex.ctxcatcodes,string.format("\\csname sgf!node\\endcsname{%s}",string.sub(str,2))) | ||
− | + | end | |
− | + | local space = lpeg.S(' \r\n')^1 | |
− | + | local lcletter = lpeg.R("az") | |
− | + | local ucletter = lpeg.R("AZ") | |
− | + | local letter = lcletter + ucletter | |
− | + | local propindent = ucletter^1 | |
− | + | local property = lpeg.C(propindent) * lpeg.C{ (lpeg.P("[") * (1 - lpeg.S"[]")^0 * lpeg.P("]"))^1} / action | |
− | + | local function nest(str) | |
− | + | tex.sprint(tex.ctxcatcodes,string.format("\\parsesgf{%s}",string.sub(str,2,-2))) | |
− | + | end | |
− | + | local node = lpeg.P{ ";" * (propindent * (lpeg.P("[") * (1 - lpeg.S"[]")^0 * lpeg.P("]"))^1)^1} / nodecontent | |
− | + | local branch = lpeg.P{ "(" * ((1 - lpeg.S"()") + lpeg.V(1))^0 * ")" } / nest | |
− | + | local parser = (branch + node + property + space)^0 | |
− | + | function thirddata.sgf.parse(str) | |
− | + | parser:match(str) | |
− | + | end | |
end | end | ||
Line 189: | Line 189: | ||
\starttexdefinition parsesgf #1 | \starttexdefinition parsesgf #1 | ||
− | + | \par \advance\leftskip by +2em | |
− | + | \sgfflush{#1} | |
− | + | \par \advance\leftskip by -2em | |
\stoptexdefinition | \stoptexdefinition | ||
\starttexdefinition sgf!node #1 | \starttexdefinition sgf!node #1 | ||
− | + | \par \sgfflush{#1} | |
\stoptexdefinition | \stoptexdefinition | ||
\starttexdefinition sgf!white #1 | \starttexdefinition sgf!white #1 | ||
− | + | White: #1\quad | |
\stoptexdefinition | \stoptexdefinition | ||
\starttexdefinition sgf!black #1 | \starttexdefinition sgf!black #1 | ||
− | + | Black: #1\quad | |
\stoptexdefinition | \stoptexdefinition | ||
\starttexdefinition sgf!addwhite #1 | \starttexdefinition sgf!addwhite #1 | ||
− | + | Add White: #1\quad | |
\stoptexdefinition | \stoptexdefinition | ||
\starttexdefinition sgf!comment #1 | \starttexdefinition sgf!comment #1 | ||
− | + | Comment: #1\quad | |
\stoptexdefinition | \stoptexdefinition | ||
Line 226: | Line 226: | ||
* http://www.inf.puc-rio.br/~roberto/lpeg.html | * http://www.inf.puc-rio.br/~roberto/lpeg.html | ||
+ | * http://www.red-bean.com/sgf/ | ||
+ | * http://www.gammon.com.au/lpeg | ||
+ | * https://en.wikipedia.org/wiki/Parsing_expression_grammar | ||
+ | |||
+ | [[Category:Sample documents]] | ||
+ | [[Category:Programming and Databases]] |
Latest revision as of 13:06, 7 January 2021
Contents
Writing a parser with LPeg
To parser input files for the use with TeX you could either write Lua macros to convert pass the input to TeX or use LPeg to parse the files in a more simple manner.
First version for a sgf parser
The code below reads input in sgf syntax and pass the content to TeX macros.
\startluacode do thirddata = thirddata or { } thirddata.sgf = thirddata.sgf or { } local function command(name,x) tex.sprint(tex.texcatcodes,string.format("\\csname sgf!%s\\endcsname{%s}",name,x)) end local nodes = { } function nodes.B(x) command("black",x) end function nodes.W(x) command("white",x) end local function action(what,data) local a = nodes[what] if a then a(data) else print("unknown action: " .. what) end end local function start() nesting = nesting + 1 command("start",nesting) end local function stop() command("stop",nesting) nesting = nesting - 1 end local space = lpeg.S(' \r\n')^1 local lower = lpeg.R("az") local upper = lpeg.R("AZ") local letter = lower + upper local position = letter^2 local left = lpeg.P("(") local right = lpeg.P(")") local none = 1- (left + right) local node = (lpeg.P(";") * lpeg.C(lpeg.S("BW")) * lpeg.P("[") * lpeg.C(position) * lpeg.P("]")) / action local branch = lpeg.P { left/start * (lpeg.V(1) + node + space)^0 * right/stop } local parser = (branch + node + space)^0 function thirddata.sgf.parsea(str) nesting = 0 parser:match(str) end local function nest(str) tex.sprint(tex.ctxcatcodes,string.format("\\parsesgfb{%s}",str)) end local node = (lpeg.P(";") * lpeg.C(lpeg.S("BW")) * lpeg.P("[") * lpeg.C(position) * lpeg.P("]")) / action local branch = lpeg.P { left * (lpeg.V(1) + (none^1/nest) + space)^0 * right } local parser = (branch + node + space)^0 function thirddata.sgf.parseb(str) nesting = 0 parser:match(str) end end \stopluacode
To work with the output from the parser we need the following TeX macros.
\long\def\parsesgfa#1{\ctxlua{thirddata.sgf.parsea("#1")}} \long\def\parsesgfb#1{\ctxlua{thirddata.sgf.parseb("#1")}} \starttexdefinition sgf!start #1 \par \advance\leftskip by +2em \stoptexdefinition \starttexdefinition sgf!stop #1 \par \advance\leftskip by -2em \stoptexdefinition \starttexdefinition sgf!node #1 Node: #1\par \stoptexdefinition \starttexdefinition sgf!white #1 White: #1\par \stoptexdefinition \starttexdefinition sgf!black #1 Black: #1\par \stoptexdefinition \protect \starttext \parsesgfa{(;B[aa];W[bb](;B[cc](;B[dd];W[ee](;B[ff]);W[gg]));W[hh])} \blank \parsesgfb{(;B[aa];W[bb](;B[cc](;B[dd];W[ee](;B[ff]);W[gg]));W[hh])} \stoptext
A extended version
The second version of the sgf parser is less restricted to the arguments for the commands in the input and delimits the arguments in a slightly different way than the first version.
\startluacode do thirddata = thirddata or { } thirddata.sgf = thirddata.sgf or { } local function command(name,x) tex.sprint(tex.texcatcodes,string.format("\\csname sgf!%s\\endcsname{%s}",name,x)) end local nodes = { } function nodes.B (x) command("black" ,x) end function nodes.W (x) command("white" ,x) end function nodes.AW(x) command("addwhite",x) end function nodes.C (x) command("comment" ,x) end local function action(what,data) local a = nodes[what] if a then for w in string.gmatch(data,"%b[]") do a(string.sub(w,2,-2)) end else print("unknown action: " .. what) end end local function nodecontent(str) tex.sprint(tex.ctxcatcodes,string.format("\\csname sgf!node\\endcsname{%s}",string.sub(str,2))) end local space = lpeg.S(' \r\n')^1 local lcletter = lpeg.R("az") local ucletter = lpeg.R("AZ") local letter = lcletter + ucletter local propindent = ucletter^1 local property = lpeg.C(propindent) * lpeg.C{ (lpeg.P("[") * (1 - lpeg.S"[]")^0 * lpeg.P("]"))^1} / action local function nest(str) tex.sprint(tex.ctxcatcodes,string.format("\\parsesgf{%s}",string.sub(str,2,-2))) end local node = lpeg.P{ ";" * (propindent * (lpeg.P("[") * (1 - lpeg.S"[]")^0 * lpeg.P("]"))^1)^1} / nodecontent local branch = lpeg.P{ "(" * ((1 - lpeg.S"()") + lpeg.V(1))^0 * ")" } / nest local parser = (branch + node + property + space)^0 function thirddata.sgf.parse(str) parser:match(str) end end \stopluacode
The TeX code for the second version is nearly the same as in the first draft, this was the goal for both version because it is more important to keep the interface in TeX and make the modifications in low level TeX or Lua code.
\def\sgfflush#1{\ctxlua{thirddata.sgf.parse("#1")}} \starttexdefinition parsesgf #1 \par \advance\leftskip by +2em \sgfflush{#1} \par \advance\leftskip by -2em \stoptexdefinition \starttexdefinition sgf!node #1 \par \sgfflush{#1} \stoptexdefinition \starttexdefinition sgf!white #1 White: #1\quad \stoptexdefinition \starttexdefinition sgf!black #1 Black: #1\quad \stoptexdefinition \starttexdefinition sgf!addwhite #1 Add White: #1\quad \stoptexdefinition \starttexdefinition sgf!comment #1 Comment: #1\quad \stoptexdefinition \protect \starttext \parsesgf{(;C[First move in game 123]B[aa]AW[aa];W[bb](;B[cc](;B[dd];W[ee][ff](;B[ff]);W[gg]));W[hh])} \stoptext