ref: 8cd7b36a0dc1aed76ab0a367267e210966668fbb
parent: 0a7866820744ad1a9ca2b93951da29ba5da809ee
author: Andrew Chambers <ac@acha.ninja>
date: Sun Apr 10 15:28:30 CDT 2022
Work on website.
--- a/Makefile
+++ b/Makefile
@@ -13,7 +13,7 @@
install: $(BINDIR) $(BINDIR)/leg $(MANDIR) $(MANDIR)/peg.1
mkdir -p $(MANDIR) $(BINDIR)
cp minipeg $(BINDIR)
- cp minipeg.1 $(MANDIR)
+ cp doc/minipeg.1 $(MANDIR)
$(MANDIR) :
mkdir -p $(MANDIR)
@@ -35,6 +35,9 @@
peg-amalg.c: peg.peg minipeg-amalg
./minipeg-amalg -o $@ peg.peg
+docs/index.html: .FORCE
+ $(SHELL) -c '(cd docs ; sh ./index.html.sh)' > $@
+
# Check the pregenerated peg.c matches the built peg-new.c.
# We also check peg-amalg.c to test our amalgamation process.
check-self-host: peg.c peg-new.c peg-amalg.c .FORCE
@@ -45,7 +48,7 @@
$(SHELL) -ec '(cd examples; $(MAKE))'
clean : .FORCE
- rm -f minipeg minipeg-split $(GENSRC) $(OBJ)
+ rm -f minipeg minipeg-amalg $(GENSRC) $(OBJ)
$(SHELL) -ec '(cd examples; $(MAKE) clean)'
.FORCE :
--- a/compile.c
+++ b/compile.c
@@ -119,7 +119,7 @@
static void label(int n) { fprintf(output, "\n l%d:;\t", n); }
static void jump(int n) { fprintf(output, " goto l%d;", n); }
static void save(int n) { fprintf(output, " int yypos%d= yy->__pos, yythunkpos%d= yy->__thunkpos;", n, n); }
-static void restore(int n) { fprintf(output, " yy->__pos= yypos%d; yy->__thunkpos= yythunkpos%d;", n, n); }
+static void restore(int n) { fprintf(output, " yy->__pos= yypos%d; yy->__thunkpos= yythunkpos%d;", n, n); }
static void Node_compile_c_ko(Node *node, int ko)
{
--- /dev/null
+++ b/docs/index.html
@@ -1,0 +1,787 @@
+<!doctype html>
+<meta name=viewport content="width=device-width, initial-scale=1">
+<link rel="stylesheet" href="./simple.css" type="text/css" />
+
+<title>Minipeg</title>
+</body>
+<div class="container">
+<h1><img id="logo" src="logo.png" alt>Minipeg</h1>
+<p>
+Minipeg is a parser generator for C you can easily add
+to your project as a single file.
+</p>
+
+<ul>
+<li>Single file distribution <a href="">minipeg.c<a/></li>
+<li>Example <a href="">calculator</a></li>
+<li>Example <a href="https://github.com/andrewchambers/minias/blob/master/asm.peg">x86_64 assembler</a></li>
+<li><a href="">Source repository</a></li>
+</ul>
+
+<h2>Man Page</h2>
+<pre>
+MINIPEG(1) General Commands Manual MINIPEG(1)
+
+NAME
+ minipeg - parser generator
+
+SYNOPSIS
+ minipeg [-hvVP -ooutput] [filename ...]
+
+DESCRIPTION
+ minipeg is a tool for generating recursive-descent parsers: programs
+ that perform pattern matching on text. They process a Parsing
+ Expression Grammar (PEG) [Ford 2004] to produce a program that
+ recognises legal sentences of that grammar. minipeg processes PEGs
+ written with syntax and conventions that are intended to make it an
+ attractive replacement for parsers built with lex(1) and yacc(1).
+ Unlike lex and yacc, minipeg support unlimited backtracking, provide
+ ordered choice as a means for disambiguation, and can combine scanning
+ (lexical analysis) and parsing (syntactic analysis) into a single
+ activity.
+
+ minipeg reads the specified filenames, or standard input if no
+ filenames are given, for a grammar describing the parser to generate.
+ minipeg then generates a C source file that defines a function
+ yyparse(). This C source file can be included in, or compiled and then
+ linked with, a client program. Each time the client program calls
+ yyparse() the parser consumes input text according to the parsing
+ rules, starting from the first rule in the grammar. yyparse() returns
+ non-zero if the input could be parsed according to the grammar; it
+ returns zero if the input could not be parsed.
+
+ The prefix 'yy' or 'YY' is prepended to all externally-visible symbols
+ in the generated parser. This is intended to reduce the risk of
+ namespace pollution in client programs. (The choice of 'yy' is
+ historical; see lex(1) and yacc(1), for example.)
+
+OPTIONS
+ minipeg provide the following options:
+
+ -h prints a summary of available options and then exits.
+
+ -ooutput
+ writes the generated parser to the file output instead of the
+ standard output.
+
+ -P suppresses #line directives in the output.
+
+ -v writes verbose information to standard error while working.
+
+ -V writes version information to standard error then exits.
+
+EXAMPLE: A CALCULATOR
+ Here we show a simple desk calculator supporting the four common
+ arithmetic operators and named variables. The intermediate results of
+ arithmetic evaluation will be accumulated on an implicit stack by
+ returning them as semantic values from sub-rules.
+
+ %{
+ #include <stdio.h> /* printf() */
+ #include <stdlib.h> /* atoi() */
+ int vars[26];
+ %}
+
+ Stmt = - e:Expr EOL { printf("%d\n", e); }
+ | ( !EOL . )* EOL { printf("error\n"); }
+
+ Expr = i:ID ASSIGN s:Sum { $$ = vars[i] = s; }
+ | s:Sum { $$ = s; }
+
+ Sum = l:Product
+ ( PLUS r:Product { l += r; }
+ | MINUS r:Product { l -= r; }
+ )* { $$ = l; }
+
+ Product = l:Value
+ ( TIMES r:Value { l *= r; }
+ | DIVIDE r:Value { l /= r; }
+ )* { $$ = l; }
+
+ Value = i:NUMBER { $$ = atoi(yytext); }
+ | i:ID !ASSIGN { $$ = vars[i]; }
+ | OPEN i:Expr CLOSE { $$ = i; }
+
+ NUMBER = < [0-9]+ > - { $$ = atoi(yytext); }
+ ID = < [a-z] > - { $$ = yytext[0] - 'a'; }
+ ASSIGN = '=' -
+ PLUS = '+' -
+ MINUS = '-' -
+ TIMES = '*' -
+ DIVIDE = '/' -
+ OPEN = '(' -
+ CLOSE = ')' -
+
+ - = [ \t]*
+ EOL = '\n' | '\r\n' | '\r' | ';'
+
+ %%
+
+ int main()
+ {
+ while (yyparse())
+ ;
+ return 0;
+ }
+
+ If the above grammar is placed in the file calc.peg, running the
+ command
+
+ $ minipeg -o calc.c calc.peg
+
+ will save the corresponding parser in the file calc.c. The program can
+ then be compiled with a C compiler and run
+
+ $ cc -o calc calc.c
+ $ ./calc
+ a=5
+ 5
+ a+5
+ 10
+
+
+MINIPEG GRAMMARS
+ A grammar consists of a set of named rules.
+
+ name = pattern
+
+ The pattern contains one or more of the following elements.
+
+ name The element stands for the entire pattern in the rule with the
+ given name.
+
+ "characters"
+ A character or string enclosed in double quotes is matched
+ literally. The ANSI C escape sequences are recognised within
+ the characters.
+
+ 'characters'
+ A character or string enclosed in single quotes is matched
+ literally, as above.
+
+ [characters]
+ A set of characters enclosed in square brackets matches any
+ single character from the set, with escape characters recognised
+ as above. If the set begins with an uparrow (^) then the set is
+ negated (the element matches any character not in the set). Any
+ pair of characters separated with a dash (-) represents the
+ range of characters from the first to the second, inclusive. A
+ single alphabetic character or underscore is matched by the
+ following set.
+
+ [a-zA-Z_]
+
+ Similarly, the following matches any single non-digit
+ character.
+
+ [^0-9]
+
+
+ . A dot matches any character. Note that the only time this fails
+ is at the end of file, where there is no character to match.
+
+ ( pattern )
+ Parentheses are used for grouping (modifying the precedence of
+ the operators described below).
+
+ { action }
+ Curly braces surround actions. The action is arbitrary C source
+ code to be executed at the end of matching. Any braces within
+ the action must be properly nested. Any input text that was
+ matched before the action and delimited by angle brackets (see
+ below) is made available within the action as the contents of
+ the character array yytext. The length of (number of characters
+ in) yytext is available in the variable yyleng. (These variable
+ names are historical; see lex(1).)
+
+ @{ action }
+ Actions prefixed with an 'at' symbol will be performed during
+ parsing, at the time they are encountered while matching the
+ input text with a rule. Because of back-tracking in the PEG
+ parsing algorithm, actions prefixed with '@' might be performed
+ multiple times for the same input text. (The usual behviour of
+ actions is that they are saved up until matching is complete,
+ and then those that are part of the final derivation are
+ performed in left-to-right order.) The variable yytext is
+ available within these actions.
+
+ exp ~ { action }
+ A postfix operator ~{ action } can be placed after any
+ expression and will behave like a normal action (arbitrary C
+ code) except that it is invoked only when exp fails. It binds
+ less tightly than any other operator except alternation and
+ sequencing, and is intended to make error handling and recovery
+ code easier to write. Note that yytext and yyleng are not
+ available inside these actions, but the pointer variable yy is
+ available to give the code access to any user-defined members of
+ the parser state (see "CUSTOMISING THE PARSER" below). Note
+ also that exp is always a single expression; to invoke an error
+ action for any failure within a sequence, parentheses must be
+ used to group the sequence into a single expression.
+
+ rule = e1 e2 e3 ~{ error("e[12] ok; e3 has failed"); }
+ | ...
+
+ rule = (e1 e2 e3) ~{ error("one of e[123] has failed"); }
+ | ...
+
+ < An opening angle bracket always matches (consuming no input) and
+ causes the parser to begin accumulating matched text. This text
+ will be made available to actions in the variable yytext.
+
+ > A closing angle bracket always matches (consuming no input) and
+ causes the parser to stop accumulating text for yytext.
+
+ The above elements can be made optional and/or repeatable with the
+ following suffixes:
+
+ element ?
+ The element is optional. If present on the input, it is
+ consumed and the match succeeds. If not present on the input,
+ no text is consumed and the match succeeds anyway.
+
+ element +
+ The element is repeatable. If present on the input, one or more
+ occurrences of element are consumed and the match succeeds. If
+ no occurrences of element are present on the input, the match
+ fails.
+
+ element *
+ The element is optional and repeatable. If present on the
+ input, one or more occurrences of element are consumed and the
+ match succeeds. If no occurrences of element are present on the
+ input, the match succeeds anyway.
+
+ The above elements and suffixes can be converted into predicates (that
+ match arbitrary input text and subsequently succeed or fail without
+ consuming that input) with the following prefixes:
+
+ & element
+ The predicate succeeds only if element can be matched. Input
+ text scanned while matching element is not consumed from the
+ input and remains available for subsequent matching.
+
+ ! element
+ The predicate succeeds only if element cannot be matched. Input
+ text scanned while matching element is not consumed from the
+ input and remains available for subsequent matching. A popular
+ idiom is
+
+ !.
+
+ which matches the end of file, after the last character of the
+ input has already been consumed.
+
+ A special form of the '&' predicate is provided:
+
+ &{ expression }
+ In this predicate the simple C expression (not statement) is
+ evaluated immediately when the parser reaches the predicate. If
+ the expression yields non-zero (true) the 'match' succeeds and
+ the parser continues with the next element in the pattern. If
+ the expression yields zero (false) the 'match' fails and the
+ parser backs up to look for an alternative parse of the input.
+
+ Several elements (with or without prefixes and suffixes) can be
+ combined into a sequence by writing them one after the other. The
+ entire sequence matches only if each individual element within it
+ matches, from left to right.
+
+ Sequences can be separated into disjoint alternatives by the
+ alternation operator '|'.
+
+ sequence-1 | sequence-2 | ... | sequence-N
+ Each sequence is tried in turn until one of them matches, at
+ which time matching for the overall pattern succeeds. If none
+ of the sequences matches then the match of the overall pattern
+ fails.
+
+ The following elements can appear in addition to rules.
+
+ %{ text... %}
+ A declaration section can appear anywhere that a rule definition
+ is expected. The text between the delimiters '%{' and '%}' is
+ copied verbatim to the generated C parser code before the code
+ that implements the parser itself.
+
+ The pound sign (#) introduces a comment (discarded) that continues
+ until the end of the line.
+
+ %% text...
+ A double percent '%%' terminates the rules (and declarations)
+ section of the grammar. All text following '%%' is copied
+ verbatim to the generated C parser code after the parser
+ implementation code.
+
+ Some notes regarding rules and and patterns follow.
+
+ rule-name Hyphens can appear as letters in the names of rules. Each
+ hyphen is converted into an underscore in the generated C source code.
+ A single hyphen '-' is a legal rule name.
+
+ Within actions you can access and manipulate named values.
+
+ $$ = value
+ A sub-rule can return a semantic value from an action by
+ assigning it to the pseudo-variable '$$'. All semantic values
+ must have the same type (which defaults to 'int'). This type
+ can be changed by defining YYSTYPE in a declaration section.
+
+ identifier:name
+ The semantic value returned (by assigning to '$$') from the
+ sub-rule name is associated with the identifier and can be
+ referred to in subsequent actions.
+
+MINIPEG GRAMMAR FOR MINIPEG GRAMMARS
+ The grammar for minipeg grammars is shown below. This will both
+ illustrate and formalise the above description.
+
+ grammar = -
+ ( declaration | definition )+
+ trailer? end-of-file
+
+ declaration = '%{' < ( !'%}' . )* > RPERCENT
+
+ trailer = '%%' < .* >
+
+ definition = identifier EQUAL expression
+
+ expression = sequence ( BAR sequence )*
+
+ sequence = error+
+
+ error = prefix ( TILDE action )?
+
+ prefix = AND action
+ | ( AND | NOT )? suffix
+
+ suffix = primary ( QUERY | STAR | PLUS )?
+
+ primary = identifier COLON identifier !EQUAL
+ | identifier !EQUAL
+ | OPEN expression CLOSE
+ | literal
+ | class
+ | DOT
+ | action
+ | BEGIN
+ | END
+
+ identifier = < [-a-zA-Z_][-a-zA-Z_0-9]* > -
+
+ literal = ['] < ( !['] char )* > ['] -
+ | ["] < ( !["] char )* > ["] -
+
+ class = '[' < ( !']' range )* > ']' -
+
+ range = char '-' char | char
+
+ char = '\\' [abefnrtv'"\[\]\\]
+ | '\\' [0-3][0-7][0-7]
+ | '\\' [0-7][0-7]?
+ | !'\\' .
+
+ action = '{' < braces* > '}' -
+
+ braces = '{' braces* '}'
+ | !'}' .
+
+ EQUAL = '=' -
+ COLON = ':' -
+ BAR = '|' -
+ AND = '&' -
+ NOT = '!' -
+ QUERY = '?' -
+ STAR = '*' -
+ PLUS = '+' -
+ OPEN = '(' -
+ CLOSE = ')' -
+ DOT = '.' -
+ BEGIN = '<' -
+ END = '>' -
+ TILDE = '~' -
+ RPERCENT = '%}' -
+
+ - = ( space | comment )*
+ space = ' ' | '\t' | end-of-line
+ comment = '#' ( !end-of-line . )* end-of-line
+ end-of-line = '\r\n' | '\n' | '\r'
+ end-of-file = !.
+
+
+CUSTOMISING THE PARSER
+ The following symbols can be redefined in declaration sections to
+ modify the generated parser code.
+
+ YYSTYPE
+ The semantic value type. The pseudo-variable '$$' and the
+ identifiers 'bound' to rule results with the colon operator ':'
+ should all be considered as being declared to have this type.
+ The default value is 'int'.
+
+ YYPARSE
+ The name of the main entry point to the parser. The default
+ value is 'yyparse'.
+
+ YYPARSEFROM
+ The name of an alternative entry point to the parser. This
+ function expects one argument: the function corresponding to the
+ rule from which the search for a match should begin. The
+ default is 'yyparsefrom'. Note that yyparse() is defined as
+
+ int yyparse() { return yyparsefrom(yy_foo); }
+
+ where 'foo' is the name of the first rule in the grammar.
+
+ YY_INPUT(buf, result, max_size)
+ This macro is invoked by the parser to obtain more input text.
+ buf points to an area of memory that can hold at most max_size
+ characters. The macro should copy input text to buf and then
+ assign the integer variable result to indicate the number of
+ characters copied. If no more input is available, the macro
+ should assign 0 to result. By default, the YY_INPUT macro is
+ defined as follows.
+
+ #define YY_INPUT(buf, result, max_size) \
+ { \
+ int yyc= getchar(); \
+ result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \
+ }
+
+ Note that if YY_CTX_LOCAL is defined (see below) then an
+ additional first argument, containing the parser context, is
+ passed to YY_INPUT.
+
+ YY_DEBUG
+ If this symbols is defined then additional code will be included
+ in the parser that prints vast quantities of arcane information
+ to the standard error while the parser is running.
+
+ YY_BEGIN
+ This macro is invoked to mark the start of input text that will
+ be made available in actions as 'yytext'. This corresponds to
+ occurrences of '<' in the grammar. These are converted into
+ predicates that are expected to succeed. The default definition
+
+ #define YY_BEGIN (yybegin= yypos, 1)
+
+ therefore saves the current input position and returns 1
+ ('true') as the result of the predicate.
+
+ YY_END This macros corresponds to '>' in the grammar. Again, it is a
+ predicate so the default definition saves the input position
+ before 'succeeding'.
+
+ #define YY_END (yyend= yypos, 1)
+
+
+ YY_PARSE(T)
+ This macro declares the parser entry points (yyparse and
+ yyparsefrom) to be of type T. The default definition
+
+ #define YY_PARSE(T) T
+
+ leaves yyparse() and yyparsefrom() with global visibility. If
+ they should not be externally visible in other source files,
+ this macro can be redefined to declare them 'static'.
+
+ #define YY_PARSE(T) static T
+
+
+ YY_CTX_LOCAL
+ If this symbol is defined during compilation of a generated
+ parser then global parser state will be kept in a structure of
+ type 'yycontext' which can be declared as a local variable.
+ This allows multiple instances of parsers to coexist and to be
+ thread-safe. The parsing function yyparse() will be declared to
+ expect a first argument of type 'yycontext *', an instance of
+ the structure holding the global state for the parser. This
+ instance must be allocated and initialised to zero by the
+ client. A trivial but complete example is as follows.
+
+ #include <stdio.h>
+
+ #define YY_CTX_LOCAL
+
+ #include "the-generated-parser.peg.c"
+
+ int main()
+ {
+ yycontext ctx;
+ memset(&ctx, 0, sizeof(yycontext));
+ while (yyparse(&ctx));
+ return 0;
+ }
+
+ Note that if this symbol is undefined then the compiled parser
+ will statically allocate its global state and will be neither
+ reentrant nor thread-safe. Note also that the parser yycontext
+ structure is initialised automatically the first time yyparse()
+ is called; this structure must therefore be properly initialised
+ to zero before the first call to yyparse().
+
+ YY_CTX_MEMBERS
+ If YY_CTX_LOCAL is defined (see above) then the macro
+ YY_CTX_MEMBERS can be defined to expand to any additional member
+ field declarations that the client would like included in the
+ declaration of the 'yycontext' structure type. These additional
+ members are otherwise ignored by the generated parser. The
+ instance of 'yycontext' associated with the currently-active
+ parser is available within actions as the pointer variable yy.
+
+ YY_BUFFER_SIZE
+ The initial size of the text buffer, in bytes. The default is
+ 1024 and the buffer size is doubled whenever required to meet
+ demand during parsing. An application that typically parses
+ much longer strings could increase this to avoid unnecessary
+ buffer reallocation.
+
+ YY_STACK_SIZE
+ The initial size of the variable and action stacks. The default
+ is 128, which is doubled whenever required to meet demand during
+ parsing. Applications that have deep call stacks with many
+ local variables, or that perform many actions after a single
+ successful match, could increase this to avoid unnecessary
+ buffer reallocation.
+
+ YY_MALLOC(YY, SIZE)
+ The memory allocator for all parser-related storage. The
+ parameters are the current yycontext structure and the number of
+ bytes to allocate. The default definition is: malloc(SIZE)
+
+ YY_REALLOC(YY, PTR, SIZE)
+ The memory reallocator for dynamically-grown storage (such as
+ text buffers and variable stacks). The parameters are the
+ current yycontext structure, the previously-allocated storage,
+ and the number of bytes to which that storage should be grown.
+ The default definition is: realloc(PTR, SIZE)
+
+ YY_FREE(YY, PTR)
+ The memory deallocator. The parameters are the current
+ yycontext structure and the storage to deallocate. The default
+ definition is: free(PTR)
+
+ YYRELEASE
+ The name of the function that releases all resources held by a
+ yycontext structure. The default value is 'yyrelease'.
+
+ The following variables can be referred to within actions.
+
+ char *yybuf
+ This variable points to the parser's input buffer used to store
+ input text that has not yet been matched.
+
+ int yypos
+ This is the offset (in yybuf) of the next character to be
+ matched and consumed.
+
+ char *yytext
+ The most recent matched text delimited by '<' and '>' is stored
+ in this variable.
+
+ int yyleng
+ This variable indicates the number of characters in 'yytext'.
+
+ yycontext *yy
+ This variable points to the instance of 'yycontext' associated
+ with the currently-active parser.
+
+ Programs that wish to release all the resources associated with a
+ parser can use the following function.
+
+ yyrelease(yycontext*yy)
+ Returns all parser-allocated storage associated with yy to the
+ system. The storage will be reallocated on the next call to
+ yyparse().
+
+ Note that the storage for the yycontext structure itself is never
+ allocated or reclaimed implicitly. The application must allocate these
+ structures in automatic storage, or use calloc() and free() to manage
+ them explicitly. The example in the following section demonstrates one
+ approach to resource management.
+
+EXAMPLE: EXTENDING THE PARSER'S CONTEXT
+ The yy variable passed to actions contains the state of the parser plus
+ any additional fields defined by YY_CTX_MEMBERS. Theses fields can be
+ used to store application-specific information that is global to a
+ particular call of yyparse(). A trivial but complete leg example
+ follows in which the yycontext structure is extended with a count of
+ the number of newline characters seen in the input so far (the grammar
+ otherwise consumes and ignores the entire input). The caller of
+ yyparse() uses count to print the number of lines of input that were
+ read.
+
+
+ %{
+ #define YY_CTX_LOCAL 1
+ #define YY_CTX_MEMBERS \
+ int count;
+ %}
+
+ Char = ('\n' | '\r\n' | '\r') { yy->count++ }
+ | .
+
+ %%
+
+ #include <stdio.h>
+ #include <string.h>
+
+ int main()
+ {
+ /* create a local parser context in automatic storage */
+ yycontext yy;
+ /* the context *must* be initialised to zero before first use*/
+ memset(&yy, 0, sizeof(yy));
+
+ while (yyparse(&yy))
+ ;
+ printf("%d newlines\n", yy.count);
+
+ /* release all resources associated with the context */
+ yyrelease(&yy);
+
+ return 0;
+ }
+
+
+DIAGNOSTICS
+ minipeg warns about the following conditions while converting a grammar
+ into a parser.
+
+ syntax error
+ The input grammar was malformed in some way. The error message
+ will include the text about to be matched (often backed up a
+ huge amount from the actual location of the error) and the line
+ number of the most recently considered character (which is often
+ the real location of the problem).
+
+ rule 'foo' used but not defined
+ The grammar referred to a rule named 'foo' but no definition for
+ it was given. Attempting to use the generated parser will
+ likely result in errors from the linker due to undefined symbols
+ associated with the missing rule.
+
+ rule 'foo' defined but not used
+ The grammar defined a rule named 'foo' and then ignored it. The
+ code associated with the rule is included in the generated
+ parser which will in all other respects be healthy.
+
+ possible infinite left recursion in rule 'foo'
+ There exists at least one path through the grammar that leads
+ from the rule 'foo' back to (a recursive invocation of) the same
+ rule without consuming any input.
+
+ Left recursion, especially that found in standards documents, is often
+ 'direct' and implies trivial repetition.
+
+ # (6.7.6)
+ direct-abstract-declarator =
+ LPAREN abstract-declarator RPAREN
+ | direct-abstract-declarator? LBRACKET assign-expr? RBRACKET
+ | direct-abstract-declarator? LBRACKET STAR RBRACKET
+ | direct-abstract-declarator? LPAREN param-type-list? RPAREN
+
+ The recursion can easily be eliminated by converting the parts of the
+ pattern following the recursion into a repeatable suffix.
+
+ # (6.7.6)
+ direct-abstract-declarator =
+ direct-abstract-declarator-head?
+ direct-abstract-declarator-tail*
+
+ direct-abstract-declarator-head =
+ LPAREN abstract-declarator RPAREN
+
+ direct-abstract-declarator-tail =
+ LBRACKET assign-expr? RBRACKET
+ | LBRACKET STAR RBRACKET
+ | LPAREN param-type-list? RPAREN
+
+
+CAVEATS
+ A parser that accepts empty input will always succeed. Consider the
+ following example, not atypical of a first attempt to write a PEG-based
+ parser:
+
+ Program = Expression*
+ Expression = "whatever"
+ %%
+ int main() {
+ while (yyparse())
+ puts("success!");
+ return 0;
+ }
+
+ This program loops forever, no matter what (if any) input is provided
+ on stdin. Many fixes are possible, the easiest being to insist that
+ the parser always consumes some non-empty input. Changing the first
+ line to
+
+ Program = Expression+
+
+ accomplishes this. If the parser is expected to consume the entire
+ input, then explicitly requiring the end-of-file is also highly
+ recommended:
+
+ Program = Expression+ !.
+
+ This works because the parser will only fail to match ("!" predicate)
+ any character at all ("." expression) when it attempts to read beyond
+ the end of the input.
+
+BUGS
+ The 'yy' and 'YY' prefixes cannot be changed.
+
+ Left recursion is detected in the input grammar but is not handled
+ correctly in the generated parser.
+
+ Diagnostics for errors in the input grammar are obscure and not
+ particularly helpful.
+
+ The operators ! and ~ should really be named the other way around.
+
+ Several commonly-used lex(1) features (yywrap(), yyin, etc.) are
+ completely absent.
+
+ The generated parser does not contain '#line' directives to direct C
+ compiler errors back to the grammar description when appropriate.
+
+SEE ALSO
+ D. Val Schorre, META II, a syntax-oriented compiler writing language,
+ 19th ACM National Conference, 1964, pp. 41.301--41.311. Describes a
+ self-implementing parser generator for analytic grammars with no
+ backtracking.
+
+ Alexander Birman, The TMG Recognition Schema, Ph.D. dissertation,
+ Princeton, 1970. A mathematical treatment of the power and complexity
+ of recursive-descent parsing with backtracking.
+
+ Bryan Ford, Parsing Expression Grammars: A Recognition-Based Syntactic
+ Foundation, ACM SIGPLAN Symposium on Principles of Programming
+ Languages, 2004. Defines PEGs and analyses them in relation to
+ context-free and regular grammars. Introduces the syntax adopted in
+ peg.
+
+ The standard Unix utilities lex(1) and yacc(1) which influenced the
+ syntax and features of minipeg.
+
+ The source code for minipeg whose grammar parsers are written using
+ themselves.
+
+ The latest version of this software and documentation:
+
+ https://github.com/andrewchambers/minipeg
+
+
+AUTHOR
+ minipeg and this manual were originally written by Ian Piumarta under
+ the project name peg/leg. minipeg is a fork of peg/leg by Andrew
+ Chambers.
+
+ Please send bug reports and suggestions for improvements to the author
+ at the project address.
+
+ MINIPEG(1)
+</pre>
+</div>
+</body>
--- /dev/null
+++ b/docs/index.html.sh
@@ -1,0 +1,32 @@
+#!/bin/sh
+set -eu
+cat <<EOF
+<!doctype html>
+<meta name=viewport content="width=device-width, initial-scale=1">
+<link rel="stylesheet" href="./simple.css" type="text/css" />
+
+<title>Minipeg</title>
+</body>
+<div class="container">
+<h1><img id="logo" src="logo.png" alt>Minipeg</h1>
+<p>
+Minipeg is a parser generator for C you can easily add
+to your project as a single file.
+</p>
+
+<ul>
+<li>Single file distribution <a href="">minipeg.c<a/></li>
+<li>Example <a href="">calculator</a></li>
+<li>Example <a href="https://github.com/andrewchambers/minias/blob/master/asm.peg">x86_64 assembler</a></li>
+<li><a href="">Source repository</a></li>
+</ul>
+
+<h2>Man Page</h2>
+EOF
+echo "<pre>"
+mandoc -Tascii -Ofragment ./minipeg.1 | col -b
+echo "</pre>"
+cat <<EOF
+</div>
+</body>
+EOF
binary files /dev/null b/docs/logo.png differ
--- /dev/null
+++ b/docs/minipeg.1
@@ -1,0 +1,915 @@
+.TH MINIPEG 1
+.SH NAME
+minipeg \- parser generator
+.SH SYNOPSIS
+.B minipeg
+.B [\-hvVP \-ooutput]
+.I [filename ...]
+.SH DESCRIPTION
+.I minipeg
+is a tool for generating recursive\-descent parsers: programs that
+perform pattern matching on text. They process a Parsing Expression
+Grammar (PEG) [Ford 2004] to produce a program that recognises legal
+sentences of that grammar.
+.I minipeg
+processes PEGs written with syntax and conventions
+that are intended to make it an attractive replacement for parsers
+built with
+.IR lex (1)
+and
+.IR yacc (1).
+Unlike
+.I lex
+and
+.IR yacc ,
+.I minipeg
+support unlimited backtracking, provide ordered choice as a means for
+disambiguation, and can combine scanning (lexical analysis) and
+parsing (syntactic analysis) into a single activity.
+.PP
+.I minipeg
+reads the specified
+.IR filename s,
+or standard input if no
+.IR filename s
+are given, for a grammar describing the parser to generate.
+.I minipeg
+then generates a C source file that defines a function
+.IR yyparse().
+This C source file can be included in, or compiled and then linked
+with, a client program. Each time the client program calls
+.IR yyparse ()
+the parser consumes input text according to the parsing rules,
+starting from the first rule in the grammar.
+.IR yyparse ()
+returns non\-zero if the input could be parsed according to the
+grammar; it returns zero if the input could not be parsed.
+.PP
+The prefix 'yy' or 'YY' is prepended to all externally\-visible symbols
+in the generated parser. This is intended to reduce the risk of
+namespace pollution in client programs. (The choice of 'yy' is
+historical; see
+.IR lex (1)
+and
+.IR yacc (1),
+for example.)
+.SH OPTIONS
+.I minipeg
+provide the following options:
+.TP
+.B \-h
+prints a summary of available options and then exits.
+.TP
+.B \-ooutput
+writes the generated parser to the file
+.B output
+instead of the standard output.
+.TP
+.B \-P
+suppresses #line directives in the output.
+.TP
+.B \-v
+writes verbose information to standard error while working.
+.TP
+.B \-V
+writes version information to standard error then exits.
+.SH EXAMPLE: A CALCULATOR
+Here we show a simple desk calculator supporting the four common arithmetic
+operators and named variables. The intermediate results of arithmetic
+evaluation will be accumulated on an implicit stack by returning them
+as semantic values from sub\-rules.
+.nf
+
+ %{
+ #include <stdio.h> /* printf() */
+ #include <stdlib.h> /* atoi() */
+ int vars[26];
+ %}
+
+ Stmt = \- e:Expr EOL { printf("%d\\n", e); }
+ | ( !EOL . )* EOL { printf("error\\n"); }
+
+ Expr = i:ID ASSIGN s:Sum { $$ = vars[i] = s; }
+ | s:Sum { $$ = s; }
+
+ Sum = l:Product
+ ( PLUS r:Product { l += r; }
+ | MINUS r:Product { l \-= r; }
+ )* { $$ = l; }
+
+ Product = l:Value
+ ( TIMES r:Value { l *= r; }
+ | DIVIDE r:Value { l /= r; }
+ )* { $$ = l; }
+
+ Value = i:NUMBER { $$ = atoi(yytext); }
+ | i:ID !ASSIGN { $$ = vars[i]; }
+ | OPEN i:Expr CLOSE { $$ = i; }
+
+ NUMBER = < [0\-9]+ > \- { $$ = atoi(yytext); }
+ ID = < [a\-z] > \- { $$ = yytext[0] \- 'a'; }
+ ASSIGN = '=' \-
+ PLUS = '+' \-
+ MINUS = '\-' \-
+ TIMES = '*' \-
+ DIVIDE = '/' \-
+ OPEN = '(' \-
+ CLOSE = ')' \-
+
+ \- = [ \\t]*
+ EOL = '\\n' | '\\r\\n' | '\\r' | ';'
+
+ %%
+
+ int main()
+ {
+ while (yyparse())
+ ;
+ return 0;
+ }
+.fi
+.PP
+If the above grammar is placed in the file
+.BR calc.peg ,
+running the command
+.nf
+
+ $ minipeg \-o calc.c calc.peg
+
+.fi
+will save the corresponding parser in the file
+.BR calc.c .
+The program can then be compiled with a C compiler and run
+.nf
+
+ $ cc \-o calc calc.c
+ $ ./calc
+ a=5
+ 5
+ a+5
+ 10
+.fi
+
+.SH MINIPEG GRAMMARS
+A grammar consists of a set of named rules.
+.nf
+
+ name = pattern
+
+.fi
+The
+.B pattern
+contains one or more of the following elements.
+.TP
+.B name
+The element stands for the entire pattern in the rule with the given
+.BR name .
+.TP
+.BR \(dq characters \(dq
+A character or string enclosed in double quotes is matched literally.
+The ANSI C escape sequences are recognised within the
+.IR characters .
+.TP
+.BR ' characters '
+A character or string enclosed in single quotes is matched literally, as above.
+.TP
+.BR [ characters ]
+A set of characters enclosed in square brackets matches any single
+character from the set, with escape characters recognised as above.
+If the set begins with an uparrow (^) then the set is negated (the
+element matches any character
+.I not
+in the set). Any pair of characters separated with a dash (\-)
+represents the range of characters from the first to the second,
+inclusive. A single alphabetic character or underscore is matched by
+the following set.
+.nf
+
+ [a\-zA\-Z_]
+
+.fi
+Similarly, the following matches any single non\-digit character.
+.nf
+
+ [^0\-9]
+
+.fi
+.TP
+.B .
+A dot matches any character. Note that the only time this fails is at
+the end of file, where there is no character to match.
+.TP
+.BR ( \ pattern\ )
+Parentheses are used for grouping (modifying the precedence of the
+operators described below).
+.TP
+.BR { \ action\ }
+Curly braces surround actions. The action is arbitrary C source code
+to be executed at the end of matching. Any braces within the action
+must be properly nested. Any input text that was matched before the
+action and delimited by angle brackets (see below) is made available
+within the action as the contents of the character array
+.IR yytext .
+The length of (number of characters in)
+.I yytext
+is available in the variable
+.IR yyleng .
+(These variable names are historical; see
+.IR lex (1).)
+.TP
+.IB @{\ action\ }
+Actions prefixed with an 'at' symbol will be performed during parsing,
+at the time they are encountered while matching the input text with a
+rule.
+Because of back-tracking in the PEG parsing algorithm, actions
+prefixed with '@' might be performed multiple times for the same input
+text.
+(The usual behviour of actions is that they are saved up until
+matching is complete, and then those that are part of the
+final derivation are performed in left-to-right order.)
+The variable
+.I yytext
+is available within these actions.
+.TP
+.IB exp \ ~ \ {\ action\ }
+A postfix operator
+.BI ~ {\ action\ }
+can be placed after any expression and will behave like a normal
+action (arbitrary C code) except that it is invoked only when
+.I exp
+fails. It binds less tightly than any other operator except alternation and sequencing, and
+is intended to make error handling and recovery code easier to write.
+Note that
+.I yytext
+and
+.I yyleng
+are not available inside these actions, but the pointer variable
+.I yy
+is available to give the code access to any user\-defined members
+of the parser state (see "CUSTOMISING THE PARSER" below).
+Note also that
+.I exp
+is always a single expression; to invoke an error action for any
+failure within a sequence, parentheses must be used to group the
+sequence into a single expression.
+.nf
+
+ rule = e1 e2 e3 ~{ error("e[12] ok; e3 has failed"); }
+ | ...
+
+ rule = (e1 e2 e3) ~{ error("one of e[123] has failed"); }
+ | ...
+.fi
+.TP
+.B <
+An opening angle bracket always matches (consuming no input) and
+causes the parser to begin accumulating matched text. This text will
+be made available to actions in the variable
+.IR yytext .
+.TP
+.B >
+A closing angle bracket always matches (consuming no input) and causes
+the parser to stop accumulating text for
+.IR yytext .
+.PP
+The above
+.IR element s
+can be made optional and/or repeatable with the following suffixes:
+.TP
+.RB element\ ?
+The element is optional. If present on the input, it is consumed and
+the match succeeds. If not present on the input, no text is consumed
+and the match succeeds anyway.
+.TP
+.RB element\ +
+The element is repeatable. If present on the input, one or more
+occurrences of
+.I element
+are consumed and the match succeeds. If no occurrences of
+.I element
+are present on the input, the match fails.
+.TP
+.RB element\ *
+The element is optional and repeatable. If present on the input, one or more
+occurrences of
+.I element
+are consumed and the match succeeds. If no occurrences of
+.I element
+are present on the input, the match succeeds anyway.
+.PP
+The above elements and suffixes can be converted into predicates (that
+match arbitrary input text and subsequently succeed or fail
+.I without
+consuming that input) with the following prefixes:
+.TP
+.BR & \ element
+The predicate succeeds only if
+.I element
+can be matched. Input text scanned while matching
+.I element
+is not consumed from the input and remains available for subsequent
+matching.
+.TP
+.BR ! \ element
+The predicate succeeds only if
+.I element
+cannot be matched. Input text scanned while matching
+.I element
+is not consumed from the input and remains available for subsequent
+matching. A popular idiom is
+.nf
+
+ !.
+
+.fi
+which matches the end of file, after the last character of the input
+has already been consumed.
+.PP
+A special form of the '&' predicate is provided:
+.TP
+.BR & {\ expression\ }
+In this predicate the simple C
+.I expression
+.RB ( not
+statement) is evaluated immediately when the parser reaches the
+predicate. If the
+.I expression
+yields non\-zero (true) the 'match' succeeds and the parser continues
+with the next element in the pattern. If the
+.I expression
+yields zero (false) the 'match' fails and the parser backs up to look
+for an alternative parse of the input.
+.PP
+Several elements (with or without prefixes and suffixes) can be
+combined into a
+.I sequence
+by writing them one after the other. The entire sequence matches only
+if each individual element within it matches, from left to right.
+.PP
+Sequences can be separated into disjoint alternatives by the
+alternation operator '|'.
+.TP
+.RB sequence\-1\ | \ sequence\-2\ | \ ...\ | \ sequence\-N
+Each sequence is tried in turn until one of them matches, at which
+time matching for the overall pattern succeeds. If none of the
+sequences matches then the match of the overall pattern fails.
+.PP
+The following elements can appear in addition to rules.
+.TP
+.BI %{\ text... \ %}
+A declaration section can appear anywhere that a rule definition is
+expected. The
+.I text
+between the delimiters '%{' and '%}' is copied verbatim to the
+generated C parser code
+.I before
+the code that implements the parser itself.
+.PP
+The pound sign (#) introduces a comment (discarded) that
+continues until the end of the line.
+.TP
+.BI %% \ text...
+A double percent '%%' terminates the rules (and declarations) section of
+the grammar. All
+.I text
+following '%%' is copied verbatim to the generated C parser code
+.I after
+the parser implementation code.
+.PP
+Some notes regarding rules and and patterns follow.
+.PP
+.B rule\-name
+Hyphens can appear as letters in the names of rules. Each hyphen is
+converted into an underscore in the generated C source code. A
+single hyphen '\-' is a legal rule name.
+.PP
+Within actions you can access and manipulate named values.
+.TP
+.BI $$\ = \ value
+A sub\-rule can return a semantic
+.I value
+from an action by assigning it to the pseudo\-variable '$$'. All
+semantic values must have the same type (which defaults to 'int').
+This type can be changed by defining YYSTYPE in a declaration section.
+.TP
+.IB identifier : name
+The semantic value returned (by assigning to '$$') from the sub\-rule
+.I name
+is associated with the
+.I identifier
+and can be referred to in subsequent actions.
+.SH MINIPEG GRAMMAR FOR MINIPEG GRAMMARS
+The grammar for
+.I minipeg
+grammars is shown below. This will both illustrate and formalise the
+above description.
+.nf
+
+ grammar = \-
+ ( declaration | definition )+
+ trailer? end\-of\-file
+
+ declaration = '%{' < ( !'%}' . )* > RPERCENT
+
+ trailer = '%%' < .* >
+
+ definition = identifier EQUAL expression
+
+ expression = sequence ( BAR sequence )*
+
+ sequence = error+
+
+ error = prefix ( TILDE action )?
+
+ prefix = AND action
+ | ( AND | NOT )? suffix
+
+ suffix = primary ( QUERY | STAR | PLUS )?
+
+ primary = identifier COLON identifier !EQUAL
+ | identifier !EQUAL
+ | OPEN expression CLOSE
+ | literal
+ | class
+ | DOT
+ | action
+ | BEGIN
+ | END
+
+ identifier = < [\-a\-zA\-Z_][\-a\-zA\-Z_0\-9]* > \-
+
+ literal = ['] < ( !['] char )* > ['] \-
+ | ["] < ( !["] char )* > ["] \-
+
+ class = '[' < ( !']' range )* > ']' \-
+
+ range = char '\-' char | char
+
+ char = '\\\\' [abefnrtv'"\\[\\]\\\\]
+ | '\\\\' [0\-3][0\-7][0\-7]
+ | '\\\\' [0\-7][0\-7]?
+ | !'\\\\' .
+
+ action = '{' < braces* > '}' \-
+
+ braces = '{' braces* '}'
+ | !'}' .
+
+ EQUAL = '=' \-
+ COLON = ':' \-
+ BAR = '|' \-
+ AND = '&' \-
+ NOT = '!' \-
+ QUERY = '?' \-
+ STAR = '*' \-
+ PLUS = '+' \-
+ OPEN = '(' \-
+ CLOSE = ')' \-
+ DOT = '.' \-
+ BEGIN = '<' \-
+ END = '>' \-
+ TILDE = '~' \-
+ RPERCENT = '%}' \-
+
+ \- = ( space | comment )*
+ space = ' ' | '\\t' | end\-of\-line
+ comment = '#' ( !end\-of\-line . )* end\-of\-line
+ end\-of\-line = '\\r\\n' | '\\n' | '\\r'
+ end\-of\-file = !.
+
+.fi
+.SH CUSTOMISING THE PARSER
+The following symbols can be redefined in declaration sections to
+modify the generated parser code.
+.TP
+.B YYSTYPE
+The semantic value type. The pseudo\-variable '$$' and the
+identifiers 'bound' to rule results with the colon operator ':' should
+all be considered as being declared to have this type. The default
+value is 'int'.
+.TP
+.B YYPARSE
+The name of the main entry point to the parser. The default value
+is 'yyparse'.
+.TP
+.B YYPARSEFROM
+The name of an alternative entry point to the parser. This function
+expects one argument: the function corresponding to the rule from
+which the search for a match should begin. The default
+is 'yyparsefrom'. Note that yyparse() is defined as
+.nf
+
+ int yyparse() { return yyparsefrom(yy_foo); }
+
+.fi
+where 'foo' is the name of the first rule in the grammar.
+.TP
+.BI YY_INPUT( buf , \ result , \ max_size )
+This macro is invoked by the parser to obtain more input text.
+.I buf
+points to an area of memory that can hold at most
+.I max_size
+characters. The macro should copy input text to
+.I buf
+and then assign the integer variable
+.I result
+to indicate the number of characters copied. If no more input is available,
+the macro should assign 0 to
+.IR result .
+By default, the YY_INPUT macro is defined as follows.
+.nf
+
+ #define YY_INPUT(buf, result, max_size) \\
+ { \\
+ int yyc= getchar(); \\
+ result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \\
+ }
+
+.fi
+Note that if YY_CTX_LOCAL is defined (see below) then an additional
+first argument, containing the parser context, is passed to YY_INPUT.
+.TP
+.B YY_DEBUG
+If this symbols is defined then additional code will be included in
+the parser that prints vast quantities of arcane information to the
+standard error while the parser is running.
+.TP
+.B YY_BEGIN
+This macro is invoked to mark the start of input text that will be
+made available in actions as 'yytext'. This corresponds to
+occurrences of '<' in the grammar. These are converted into
+predicates that are expected to succeed. The default definition
+.nf
+
+ #define YY_BEGIN (yybegin= yypos, 1)
+
+.fi
+therefore saves the current input position and returns 1 ('true') as
+the result of the predicate.
+.TP
+.B YY_END
+This macros corresponds to '>' in the grammar. Again, it is a
+predicate so the default definition saves the input position
+before 'succeeding'.
+.nf
+
+ #define YY_END (yyend= yypos, 1)
+
+.fi
+.TP
+.BI YY_PARSE( T )
+This macro declares the parser entry points (yyparse and yyparsefrom)
+to be of type
+.IR T .
+The default definition
+.nf
+
+ #define YY_PARSE(T) T
+
+.fi
+leaves yyparse() and yyparsefrom() with global visibility. If they
+should not be externally visible in other source files, this macro can
+be redefined to declare them 'static'.
+.nf
+
+ #define YY_PARSE(T) static T
+
+.fi
+.TP
+.B YY_CTX_LOCAL
+If this symbol is defined during compilation of a generated parser
+then global parser state will be kept in a structure of
+type 'yycontext' which can be declared as a local variable. This
+allows multiple instances of parsers to coexist and to be thread\-safe.
+The parsing function
+.IR yyparse ()
+will be declared to expect a first argument of type 'yycontext *', an
+instance of the structure holding the global state for the parser.
+This instance must be allocated and initialised to zero by the client.
+A trivial but complete example is as follows.
+.nf
+
+ #include <stdio.h>
+
+ #define YY_CTX_LOCAL
+
+ #include "the\-generated\-parser.peg.c"
+
+ int main()
+ {
+ yycontext ctx;
+ memset(&ctx, 0, sizeof(yycontext));
+ while (yyparse(&ctx));
+ return 0;
+ }
+
+.fi
+Note that if this symbol is undefined then the compiled parser will
+statically allocate its global state and will be neither reentrant nor
+thread\-safe.
+Note also that the parser yycontext structure is initialised automatically
+the first time
+.IR yyparse ()
+is called; this structure
+.B must
+therefore be properly initialised to zero before the first call to
+.IR yyparse ().
+.TP
+.B YY_CTX_MEMBERS
+If YY_CTX_LOCAL is defined (see above) then the macro YY_CTX_MEMBERS
+can be defined to expand to any additional member field declarations
+that the client would like included in the declaration of
+the 'yycontext' structure type. These additional members are
+otherwise ignored by the generated parser. The instance
+of 'yycontext' associated with the currently\-active parser is
+available within actions as the pointer variable
+.IR yy .
+.TP
+.B YY_BUFFER_SIZE
+The initial size of the text buffer, in bytes. The default is 1024
+and the buffer size is doubled whenever required to meet demand during
+parsing. An application that typically parses much longer strings
+could increase this to avoid unnecessary buffer reallocation.
+.TP
+.B YY_STACK_SIZE
+The initial size of the variable and action stacks. The default is
+128, which is doubled whenever required to meet demand during parsing.
+Applications that have deep call stacks with many local variables, or
+that perform many actions after a single successful match, could increase
+this to avoid unnecessary buffer reallocation.
+.TP
+.BI YY_MALLOC( YY , \ SIZE )
+The memory allocator for all parser\-related storage. The parameters
+are the current yycontext structure and the number of bytes to
+allocate. The default definition is:
+.RI malloc( SIZE )
+.TP
+.BI YY_REALLOC( YY , \ PTR , \ SIZE )
+The memory reallocator for dynamically\-grown storage (such as text
+buffers and variable stacks). The parameters are the current
+yycontext structure, the previously\-allocated storage, and the number
+of bytes to which that storage should be grown. The default definition is:
+.RI realloc( PTR , \ SIZE )
+.TP
+.BI YY_FREE( YY , \ PTR )
+The memory deallocator. The parameters are the current yycontext
+structure and the storage to deallocate. The default definition is:
+.RI free( PTR )
+.TP
+.B YYRELEASE
+The name of the function that releases all resources held by a
+yycontext structure. The default value is 'yyrelease'.
+.PP
+The following variables can be referred to within actions.
+.TP
+.B char *yybuf
+This variable points to the parser's input buffer used to store input
+text that has not yet been matched.
+.TP
+.B int yypos
+This is the offset (in yybuf) of the next character to be matched and
+consumed.
+.TP
+.B char *yytext
+The most recent matched text delimited by '<' and '>' is stored in this variable.
+.TP
+.B int yyleng
+This variable indicates the number of characters in 'yytext'.
+.TP
+.B yycontext *yy
+This variable points to the instance of 'yycontext' associated with
+the currently\-active parser.
+.PP
+Programs that wish to release all the resources associated with a
+parser can use the following function.
+.TP
+.BI yyrelease(yycontext * yy )
+Returns all parser\-allocated storage associated with
+.I yy
+to the system. The storage will be reallocated on the next call to
+.IR yyparse ().
+.PP
+Note that the storage for the yycontext structure itself is never
+allocated or reclaimed implicitly. The application must allocate
+these structures in automatic storage, or use
+.IR calloc ()
+and
+.IR free ()
+to manage them explicitly. The example in the following section
+demonstrates one approach to resource management.
+.SH EXAMPLE: EXTENDING THE PARSER'S CONTEXT
+The
+.I yy
+variable passed to actions contains the state of the parser plus any
+additional fields defined by YY_CTX_MEMBERS. Theses fields can be
+used to store application\-specific information that is global to a
+particular call of
+.IR yyparse ().
+A trivial but complete
+.I leg
+example follows in which the yycontext
+structure is extended with a
+.I count
+of the number of newline characters
+seen in the input so far (the grammar otherwise consumes and ignores
+the entire input). The caller of
+.IR yyparse ()
+uses
+.I count
+to print the number of lines of input that were read.
+
+.nf
+
+ %{
+ #define YY_CTX_LOCAL 1
+ #define YY_CTX_MEMBERS \\
+ int count;
+ %}
+
+ Char = ('\\n' | '\\r\\n' | '\\r') { yy\->count++ }
+ | .
+
+ %%
+
+ #include <stdio.h>
+ #include <string.h>
+
+ int main()
+ {
+ /* create a local parser context in automatic storage */
+ yycontext yy;
+ /* the context *must* be initialised to zero before first use*/
+ memset(&yy, 0, sizeof(yy));
+
+ while (yyparse(&yy))
+ ;
+ printf("%d newlines\\n", yy.count);
+
+ /* release all resources associated with the context */
+ yyrelease(&yy);
+
+ return 0;
+ }
+
+.fi
+.SH DIAGNOSTICS
+.I minipeg
+warns about the following conditions while converting a grammar into a parser.
+.TP
+.B syntax error
+The input grammar was malformed in some way. The error message will
+include the text about to be matched (often backed up a huge amount
+from the actual location of the error) and the line number of the most
+recently considered character (which is often the real location of the
+problem).
+.TP
+.B rule 'foo' used but not defined
+The grammar referred to a rule named 'foo' but no definition for it
+was given. Attempting to use the generated parser will likely result
+in errors from the linker due to undefined symbols associated with the
+missing rule.
+.TP
+.B rule 'foo' defined but not used
+The grammar defined a rule named 'foo' and then ignored it. The code
+associated with the rule is included in the generated parser which
+will in all other respects be healthy.
+.TP
+.B possible infinite left recursion in rule 'foo'
+There exists at least one path through the grammar that leads from the
+rule 'foo' back to (a recursive invocation of) the same rule without
+consuming any input.
+.PP
+Left recursion, especially that found in standards documents, is
+often 'direct' and implies trivial repetition.
+.nf
+
+ # (6.7.6)
+ direct\-abstract\-declarator =
+ LPAREN abstract\-declarator RPAREN
+ | direct\-abstract\-declarator? LBRACKET assign\-expr? RBRACKET
+ | direct\-abstract\-declarator? LBRACKET STAR RBRACKET
+ | direct\-abstract\-declarator? LPAREN param\-type\-list? RPAREN
+
+.fi
+The recursion can easily be eliminated by converting the parts of the
+pattern following the recursion into a repeatable suffix.
+.nf
+
+ # (6.7.6)
+ direct\-abstract\-declarator =
+ direct\-abstract\-declarator\-head?
+ direct\-abstract\-declarator\-tail*
+
+ direct\-abstract\-declarator\-head =
+ LPAREN abstract\-declarator RPAREN
+
+ direct\-abstract\-declarator\-tail =
+ LBRACKET assign\-expr? RBRACKET
+ | LBRACKET STAR RBRACKET
+ | LPAREN param\-type\-list? RPAREN
+
+.fi
+.SH CAVEATS
+A parser that accepts empty input will
+.I always
+succeed. Consider the following example, not atypical of a first
+attempt to write a PEG\-based parser:
+.nf
+
+ Program = Expression*
+ Expression = "whatever"
+ %%
+ int main() {
+ while (yyparse())
+ puts("success!");
+ return 0;
+ }
+
+.fi
+This program loops forever, no matter what (if any) input is provided
+on stdin. Many fixes are possible, the easiest being to insist that
+the parser always consumes some non\-empty input. Changing the first
+line to
+.nf
+
+ Program = Expression+
+
+.fi
+accomplishes this. If the parser is expected to consume the entire
+input, then explicitly requiring the end\-of\-file is also highly
+recommended:
+.nf
+
+ Program = Expression+ !.
+
+.fi
+This works because the parser will only fail to match ("!" predicate)
+any character at all ("." expression) when it attempts to read beyond
+the end of the input.
+.SH BUGS
+.PP
+The 'yy' and 'YY' prefixes cannot be changed.
+.PP
+Left recursion is detected in the input grammar but is not handled
+correctly in the generated parser.
+.PP
+Diagnostics for errors in the input grammar are obscure and not
+particularly helpful.
+.PP
+The operators
+.BR ! \ \c
+and
+.B ~
+should really be named the other way around.
+.PP
+Several commonly\-used
+.IR lex (1)
+features (yywrap(), yyin, etc.) are completely absent.
+.PP
+The generated parser does not contain '#line' directives to direct C
+compiler errors back to the grammar description when appropriate.
+.SH SEE ALSO
+D. Val Schorre,
+.I META II, a syntax\-oriented compiler writing language,
+19th ACM National Conference, 1964, pp.\ 41.301\-\-41.311. Describes a
+self\-implementing parser generator for analytic grammars with no
+backtracking.
+.PP
+Alexander Birman,
+.I The TMG Recognition Schema,
+Ph.D. dissertation, Princeton, 1970. A mathematical treatment of the
+power and complexity of recursive\-descent parsing with backtracking.
+.PP
+Bryan Ford,
+.I Parsing Expression Grammars: A Recognition\-Based Syntactic Foundation,
+ACM SIGPLAN Symposium on Principles of Programming Languages, 2004.
+Defines PEGs and analyses them in relation to context\-free and regular
+grammars. Introduces the syntax adopted in
+.IR peg .
+.PP
+The standard Unix utilities
+.IR lex (1)
+and
+.IR yacc (1)
+which influenced the syntax and features of
+.IR minipeg .
+.PP
+The source code for
+.I minipeg
+whose grammar parsers are written using themselves.
+.PP
+The latest version of this software and documentation:
+.nf
+
+ https://github.com/andrewchambers/minipeg
+
+.fi
+.SH AUTHOR
+.IR minipeg
+and this manual were originally written by Ian Piumarta
+under the project name peg/leg.
+.IR minipeg
+is a fork of peg/leg by Andrew Chambers.
+.PP
+Please send bug reports and suggestions for improvements to the author
+at the project address.
--- /dev/null
+++ b/docs/simple.css
@@ -1,0 +1,14 @@
+pre, p, h3 { margin: 0.9em 0px 0.9em; }
+article, .container { width: 800px; margin: auto; padding 0 20px 0 20px; }
+h1 { font-family: arial, sans; }
+h2 { text-align: center; margin-bottom: 1em; }
+pre, code { line-height: normal; overflow: auto; font-size: 0.8em; }
+#logo { vertical-align: middle; margin-right: 16px; }
+body { line-height: 1.3; }
+html { font-size: 20px; font-family: "Charter","Georgia",'Times New Roman',serif; }
+.bnf { background-color: white; padding-left: 2em; }
+
+@media only screen and (max-device-width: 480px) {
+article, .container { width: 100%; }
+html { font-size: 13px; }
+}
--- a/minipeg.1
+++ /dev/null
@@ -1,915 +1,0 @@
-.TH MINIPEG 1
-.SH NAME
-minipeg \- parser generator
-.SH SYNOPSIS
-.B minipeg
-.B [\-hvVP \-ooutput]
-.I [filename ...]
-.SH DESCRIPTION
-.I minipeg
-is a tool for generating recursive\-descent parsers: programs that
-perform pattern matching on text. They process a Parsing Expression
-Grammar (PEG) [Ford 2004] to produce a program that recognises legal
-sentences of that grammar.
-.I minipeg
-processes PEGs written with syntax and conventions
-that are intended to make it an attractive replacement for parsers
-built with
-.IR lex (1)
-and
-.IR yacc (1).
-Unlike
-.I lex
-and
-.IR yacc ,
-.I minipeg
-support unlimited backtracking, provide ordered choice as a means for
-disambiguation, and can combine scanning (lexical analysis) and
-parsing (syntactic analysis) into a single activity.
-.PP
-.I minipeg
-reads the specified
-.IR filename s,
-or standard input if no
-.IR filename s
-are given, for a grammar describing the parser to generate.
-.I minipeg
-then generates a C source file that defines a function
-.IR yyparse().
-This C source file can be included in, or compiled and then linked
-with, a client program. Each time the client program calls
-.IR yyparse ()
-the parser consumes input text according to the parsing rules,
-starting from the first rule in the grammar.
-.IR yyparse ()
-returns non\-zero if the input could be parsed according to the
-grammar; it returns zero if the input could not be parsed.
-.PP
-The prefix 'yy' or 'YY' is prepended to all externally\-visible symbols
-in the generated parser. This is intended to reduce the risk of
-namespace pollution in client programs. (The choice of 'yy' is
-historical; see
-.IR lex (1)
-and
-.IR yacc (1),
-for example.)
-.SH OPTIONS
-.I minipeg
-provide the following options:
-.TP
-.B \-h
-prints a summary of available options and then exits.
-.TP
-.B \-ooutput
-writes the generated parser to the file
-.B output
-instead of the standard output.
-.TP
-.B \-P
-suppresses #line directives in the output.
-.TP
-.B \-v
-writes verbose information to standard error while working.
-.TP
-.B \-V
-writes version information to standard error then exits.
-.SH EXAMPLE: A CALCULATOR
-Here we show a simple desk calculator supporting the four common arithmetic
-operators and named variables. The intermediate results of arithmetic
-evaluation will be accumulated on an implicit stack by returning them
-as semantic values from sub\-rules.
-.nf
-
- %{
- #include <stdio.h> /* printf() */
- #include <stdlib.h> /* atoi() */
- int vars[26];
- %}
-
- Stmt = \- e:Expr EOL { printf("%d\\n", e); }
- | ( !EOL . )* EOL { printf("error\\n"); }
-
- Expr = i:ID ASSIGN s:Sum { $$ = vars[i] = s; }
- | s:Sum { $$ = s; }
-
- Sum = l:Product
- ( PLUS r:Product { l += r; }
- | MINUS r:Product { l \-= r; }
- )* { $$ = l; }
-
- Product = l:Value
- ( TIMES r:Value { l *= r; }
- | DIVIDE r:Value { l /= r; }
- )* { $$ = l; }
-
- Value = i:NUMBER { $$ = atoi(yytext); }
- | i:ID !ASSIGN { $$ = vars[i]; }
- | OPEN i:Expr CLOSE { $$ = i; }
-
- NUMBER = < [0\-9]+ > \- { $$ = atoi(yytext); }
- ID = < [a\-z] > \- { $$ = yytext[0] \- 'a'; }
- ASSIGN = '=' \-
- PLUS = '+' \-
- MINUS = '\-' \-
- TIMES = '*' \-
- DIVIDE = '/' \-
- OPEN = '(' \-
- CLOSE = ')' \-
-
- \- = [ \\t]*
- EOL = '\\n' | '\\r\\n' | '\\r' | ';'
-
- %%
-
- int main()
- {
- while (yyparse())
- ;
- return 0;
- }
-.fi
-.PP
-If the above grammar is placed in the file
-.BR calc.peg ,
-running the command
-.nf
-
- $ minipeg \-o calc.c calc.peg
-
-.fi
-will save the corresponding parser in the file
-.BR calc.c .
-The program can then be compiled with a C compiler and run
-.nf
-
- $ cc \-o calc calc.c
- $ ./calc
- a=5
- 5
- a+5
- 10
-.fi
-
-.SH MINIPEG GRAMMARS
-A grammar consists of a set of named rules.
-.nf
-
- name = pattern
-
-.fi
-The
-.B pattern
-contains one or more of the following elements.
-.TP
-.B name
-The element stands for the entire pattern in the rule with the given
-.BR name .
-.TP
-.BR \(dq characters \(dq
-A character or string enclosed in double quotes is matched literally.
-The ANSI C escape sequences are recognised within the
-.IR characters .
-.TP
-.BR ' characters '
-A character or string enclosed in single quotes is matched literally, as above.
-.TP
-.BR [ characters ]
-A set of characters enclosed in square brackets matches any single
-character from the set, with escape characters recognised as above.
-If the set begins with an uparrow (^) then the set is negated (the
-element matches any character
-.I not
-in the set). Any pair of characters separated with a dash (\-)
-represents the range of characters from the first to the second,
-inclusive. A single alphabetic character or underscore is matched by
-the following set.
-.nf
-
- [a\-zA\-Z_]
-
-.fi
-Similarly, the following matches any single non\-digit character.
-.nf
-
- [^0\-9]
-
-.fi
-.TP
-.B .
-A dot matches any character. Note that the only time this fails is at
-the end of file, where there is no character to match.
-.TP
-.BR ( \ pattern\ )
-Parentheses are used for grouping (modifying the precedence of the
-operators described below).
-.TP
-.BR { \ action\ }
-Curly braces surround actions. The action is arbitrary C source code
-to be executed at the end of matching. Any braces within the action
-must be properly nested. Any input text that was matched before the
-action and delimited by angle brackets (see below) is made available
-within the action as the contents of the character array
-.IR yytext .
-The length of (number of characters in)
-.I yytext
-is available in the variable
-.IR yyleng .
-(These variable names are historical; see
-.IR lex (1).)
-.TP
-.IB @{\ action\ }
-Actions prefixed with an 'at' symbol will be performed during parsing,
-at the time they are encountered while matching the input text with a
-rule.
-Because of back-tracking in the PEG parsing algorithm, actions
-prefixed with '@' might be performed multiple times for the same input
-text.
-(The usual behviour of actions is that they are saved up until
-matching is complete, and then those that are part of the
-final derivation are performed in left-to-right order.)
-The variable
-.I yytext
-is available within these actions.
-.TP
-.IB exp \ ~ \ {\ action\ }
-A postfix operator
-.BI ~ {\ action\ }
-can be placed after any expression and will behave like a normal
-action (arbitrary C code) except that it is invoked only when
-.I exp
-fails. It binds less tightly than any other operator except alternation and sequencing, and
-is intended to make error handling and recovery code easier to write.
-Note that
-.I yytext
-and
-.I yyleng
-are not available inside these actions, but the pointer variable
-.I yy
-is available to give the code access to any user\-defined members
-of the parser state (see "CUSTOMISING THE PARSER" below).
-Note also that
-.I exp
-is always a single expression; to invoke an error action for any
-failure within a sequence, parentheses must be used to group the
-sequence into a single expression.
-.nf
-
- rule = e1 e2 e3 ~{ error("e[12] ok; e3 has failed"); }
- | ...
-
- rule = (e1 e2 e3) ~{ error("one of e[123] has failed"); }
- | ...
-.fi
-.TP
-.B <
-An opening angle bracket always matches (consuming no input) and
-causes the parser to begin accumulating matched text. This text will
-be made available to actions in the variable
-.IR yytext .
-.TP
-.B >
-A closing angle bracket always matches (consuming no input) and causes
-the parser to stop accumulating text for
-.IR yytext .
-.PP
-The above
-.IR element s
-can be made optional and/or repeatable with the following suffixes:
-.TP
-.RB element\ ?
-The element is optional. If present on the input, it is consumed and
-the match succeeds. If not present on the input, no text is consumed
-and the match succeeds anyway.
-.TP
-.RB element\ +
-The element is repeatable. If present on the input, one or more
-occurrences of
-.I element
-are consumed and the match succeeds. If no occurrences of
-.I element
-are present on the input, the match fails.
-.TP
-.RB element\ *
-The element is optional and repeatable. If present on the input, one or more
-occurrences of
-.I element
-are consumed and the match succeeds. If no occurrences of
-.I element
-are present on the input, the match succeeds anyway.
-.PP
-The above elements and suffixes can be converted into predicates (that
-match arbitrary input text and subsequently succeed or fail
-.I without
-consuming that input) with the following prefixes:
-.TP
-.BR & \ element
-The predicate succeeds only if
-.I element
-can be matched. Input text scanned while matching
-.I element
-is not consumed from the input and remains available for subsequent
-matching.
-.TP
-.BR ! \ element
-The predicate succeeds only if
-.I element
-cannot be matched. Input text scanned while matching
-.I element
-is not consumed from the input and remains available for subsequent
-matching. A popular idiom is
-.nf
-
- !.
-
-.fi
-which matches the end of file, after the last character of the input
-has already been consumed.
-.PP
-A special form of the '&' predicate is provided:
-.TP
-.BR & {\ expression\ }
-In this predicate the simple C
-.I expression
-.RB ( not
-statement) is evaluated immediately when the parser reaches the
-predicate. If the
-.I expression
-yields non\-zero (true) the 'match' succeeds and the parser continues
-with the next element in the pattern. If the
-.I expression
-yields zero (false) the 'match' fails and the parser backs up to look
-for an alternative parse of the input.
-.PP
-Several elements (with or without prefixes and suffixes) can be
-combined into a
-.I sequence
-by writing them one after the other. The entire sequence matches only
-if each individual element within it matches, from left to right.
-.PP
-Sequences can be separated into disjoint alternatives by the
-alternation operator '|'.
-.TP
-.RB sequence\-1\ | \ sequence\-2\ | \ ...\ | \ sequence\-N
-Each sequence is tried in turn until one of them matches, at which
-time matching for the overall pattern succeeds. If none of the
-sequences matches then the match of the overall pattern fails.
-.PP
-The following elements can appear in addition to rules.
-.TP
-.BI %{\ text... \ %}
-A declaration section can appear anywhere that a rule definition is
-expected. The
-.I text
-between the delimiters '%{' and '%}' is copied verbatim to the
-generated C parser code
-.I before
-the code that implements the parser itself.
-.PP
-The pound sign (#) introduces a comment (discarded) that
-continues until the end of the line.
-.TP
-.BI %% \ text...
-A double percent '%%' terminates the rules (and declarations) section of
-the grammar. All
-.I text
-following '%%' is copied verbatim to the generated C parser code
-.I after
-the parser implementation code.
-.PP
-Some notes regarding rules and and patterns follow.
-.PP
-.B rule\-name
-Hyphens can appear as letters in the names of rules. Each hyphen is
-converted into an underscore in the generated C source code. A
-single hyphen '\-' is a legal rule name.
-.PP
-Within actions you can access and manipulate named values.
-.TP
-.BI $$\ = \ value
-A sub\-rule can return a semantic
-.I value
-from an action by assigning it to the pseudo\-variable '$$'. All
-semantic values must have the same type (which defaults to 'int').
-This type can be changed by defining YYSTYPE in a declaration section.
-.TP
-.IB identifier : name
-The semantic value returned (by assigning to '$$') from the sub\-rule
-.I name
-is associated with the
-.I identifier
-and can be referred to in subsequent actions.
-.SH MINIPEG GRAMMAR FOR MINIPEG GRAMMARS
-The grammar for
-.I minipeg
-grammars is shown below. This will both illustrate and formalise the
-above description.
-.nf
-
- grammar = \-
- ( declaration | definition )+
- trailer? end\-of\-file
-
- declaration = '%{' < ( !'%}' . )* > RPERCENT
-
- trailer = '%%' < .* >
-
- definition = identifier EQUAL expression
-
- expression = sequence ( BAR sequence )*
-
- sequence = error+
-
- error = prefix ( TILDE action )?
-
- prefix = AND action
- | ( AND | NOT )? suffix
-
- suffix = primary ( QUERY | STAR | PLUS )?
-
- primary = identifier COLON identifier !EQUAL
- | identifier !EQUAL
- | OPEN expression CLOSE
- | literal
- | class
- | DOT
- | action
- | BEGIN
- | END
-
- identifier = < [\-a\-zA\-Z_][\-a\-zA\-Z_0\-9]* > \-
-
- literal = ['] < ( !['] char )* > ['] \-
- | ["] < ( !["] char )* > ["] \-
-
- class = '[' < ( !']' range )* > ']' \-
-
- range = char '\-' char | char
-
- char = '\\\\' [abefnrtv'"\\[\\]\\\\]
- | '\\\\' [0\-3][0\-7][0\-7]
- | '\\\\' [0\-7][0\-7]?
- | !'\\\\' .
-
- action = '{' < braces* > '}' \-
-
- braces = '{' braces* '}'
- | !'}' .
-
- EQUAL = '=' \-
- COLON = ':' \-
- BAR = '|' \-
- AND = '&' \-
- NOT = '!' \-
- QUERY = '?' \-
- STAR = '*' \-
- PLUS = '+' \-
- OPEN = '(' \-
- CLOSE = ')' \-
- DOT = '.' \-
- BEGIN = '<' \-
- END = '>' \-
- TILDE = '~' \-
- RPERCENT = '%}' \-
-
- \- = ( space | comment )*
- space = ' ' | '\\t' | end\-of\-line
- comment = '#' ( !end\-of\-line . )* end\-of\-line
- end\-of\-line = '\\r\\n' | '\\n' | '\\r'
- end\-of\-file = !.
-
-.fi
-.SH CUSTOMISING THE PARSER
-The following symbols can be redefined in declaration sections to
-modify the generated parser code.
-.TP
-.B YYSTYPE
-The semantic value type. The pseudo\-variable '$$' and the
-identifiers 'bound' to rule results with the colon operator ':' should
-all be considered as being declared to have this type. The default
-value is 'int'.
-.TP
-.B YYPARSE
-The name of the main entry point to the parser. The default value
-is 'yyparse'.
-.TP
-.B YYPARSEFROM
-The name of an alternative entry point to the parser. This function
-expects one argument: the function corresponding to the rule from
-which the search for a match should begin. The default
-is 'yyparsefrom'. Note that yyparse() is defined as
-.nf
-
- int yyparse() { return yyparsefrom(yy_foo); }
-
-.fi
-where 'foo' is the name of the first rule in the grammar.
-.TP
-.BI YY_INPUT( buf , \ result , \ max_size )
-This macro is invoked by the parser to obtain more input text.
-.I buf
-points to an area of memory that can hold at most
-.I max_size
-characters. The macro should copy input text to
-.I buf
-and then assign the integer variable
-.I result
-to indicate the number of characters copied. If no more input is available,
-the macro should assign 0 to
-.IR result .
-By default, the YY_INPUT macro is defined as follows.
-.nf
-
- #define YY_INPUT(buf, result, max_size) \\
- { \\
- int yyc= getchar(); \\
- result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \\
- }
-
-.fi
-Note that if YY_CTX_LOCAL is defined (see below) then an additional
-first argument, containing the parser context, is passed to YY_INPUT.
-.TP
-.B YY_DEBUG
-If this symbols is defined then additional code will be included in
-the parser that prints vast quantities of arcane information to the
-standard error while the parser is running.
-.TP
-.B YY_BEGIN
-This macro is invoked to mark the start of input text that will be
-made available in actions as 'yytext'. This corresponds to
-occurrences of '<' in the grammar. These are converted into
-predicates that are expected to succeed. The default definition
-.nf
-
- #define YY_BEGIN (yybegin= yypos, 1)
-
-.fi
-therefore saves the current input position and returns 1 ('true') as
-the result of the predicate.
-.TP
-.B YY_END
-This macros corresponds to '>' in the grammar. Again, it is a
-predicate so the default definition saves the input position
-before 'succeeding'.
-.nf
-
- #define YY_END (yyend= yypos, 1)
-
-.fi
-.TP
-.BI YY_PARSE( T )
-This macro declares the parser entry points (yyparse and yyparsefrom)
-to be of type
-.IR T .
-The default definition
-.nf
-
- #define YY_PARSE(T) T
-
-.fi
-leaves yyparse() and yyparsefrom() with global visibility. If they
-should not be externally visible in other source files, this macro can
-be redefined to declare them 'static'.
-.nf
-
- #define YY_PARSE(T) static T
-
-.fi
-.TP
-.B YY_CTX_LOCAL
-If this symbol is defined during compilation of a generated parser
-then global parser state will be kept in a structure of
-type 'yycontext' which can be declared as a local variable. This
-allows multiple instances of parsers to coexist and to be thread\-safe.
-The parsing function
-.IR yyparse ()
-will be declared to expect a first argument of type 'yycontext *', an
-instance of the structure holding the global state for the parser.
-This instance must be allocated and initialised to zero by the client.
-A trivial but complete example is as follows.
-.nf
-
- #include <stdio.h>
-
- #define YY_CTX_LOCAL
-
- #include "the\-generated\-parser.peg.c"
-
- int main()
- {
- yycontext ctx;
- memset(&ctx, 0, sizeof(yycontext));
- while (yyparse(&ctx));
- return 0;
- }
-
-.fi
-Note that if this symbol is undefined then the compiled parser will
-statically allocate its global state and will be neither reentrant nor
-thread\-safe.
-Note also that the parser yycontext structure is initialised automatically
-the first time
-.IR yyparse ()
-is called; this structure
-.B must
-therefore be properly initialised to zero before the first call to
-.IR yyparse ().
-.TP
-.B YY_CTX_MEMBERS
-If YY_CTX_LOCAL is defined (see above) then the macro YY_CTX_MEMBERS
-can be defined to expand to any additional member field declarations
-that the client would like included in the declaration of
-the 'yycontext' structure type. These additional members are
-otherwise ignored by the generated parser. The instance
-of 'yycontext' associated with the currently\-active parser is
-available within actions as the pointer variable
-.IR yy .
-.TP
-.B YY_BUFFER_SIZE
-The initial size of the text buffer, in bytes. The default is 1024
-and the buffer size is doubled whenever required to meet demand during
-parsing. An application that typically parses much longer strings
-could increase this to avoid unnecessary buffer reallocation.
-.TP
-.B YY_STACK_SIZE
-The initial size of the variable and action stacks. The default is
-128, which is doubled whenever required to meet demand during parsing.
-Applications that have deep call stacks with many local variables, or
-that perform many actions after a single successful match, could increase
-this to avoid unnecessary buffer reallocation.
-.TP
-.BI YY_MALLOC( YY , \ SIZE )
-The memory allocator for all parser\-related storage. The parameters
-are the current yycontext structure and the number of bytes to
-allocate. The default definition is:
-.RI malloc( SIZE )
-.TP
-.BI YY_REALLOC( YY , \ PTR , \ SIZE )
-The memory reallocator for dynamically\-grown storage (such as text
-buffers and variable stacks). The parameters are the current
-yycontext structure, the previously\-allocated storage, and the number
-of bytes to which that storage should be grown. The default definition is:
-.RI realloc( PTR , \ SIZE )
-.TP
-.BI YY_FREE( YY , \ PTR )
-The memory deallocator. The parameters are the current yycontext
-structure and the storage to deallocate. The default definition is:
-.RI free( PTR )
-.TP
-.B YYRELEASE
-The name of the function that releases all resources held by a
-yycontext structure. The default value is 'yyrelease'.
-.PP
-The following variables can be referred to within actions.
-.TP
-.B char *yybuf
-This variable points to the parser's input buffer used to store input
-text that has not yet been matched.
-.TP
-.B int yypos
-This is the offset (in yybuf) of the next character to be matched and
-consumed.
-.TP
-.B char *yytext
-The most recent matched text delimited by '<' and '>' is stored in this variable.
-.TP
-.B int yyleng
-This variable indicates the number of characters in 'yytext'.
-.TP
-.B yycontext *yy
-This variable points to the instance of 'yycontext' associated with
-the currently\-active parser.
-.PP
-Programs that wish to release all the resources associated with a
-parser can use the following function.
-.TP
-.BI yyrelease(yycontext * yy )
-Returns all parser\-allocated storage associated with
-.I yy
-to the system. The storage will be reallocated on the next call to
-.IR yyparse ().
-.PP
-Note that the storage for the yycontext structure itself is never
-allocated or reclaimed implicitly. The application must allocate
-these structures in automatic storage, or use
-.IR calloc ()
-and
-.IR free ()
-to manage them explicitly. The example in the following section
-demonstrates one approach to resource management.
-.SH EXAMPLE: EXTENDING THE PARSER'S CONTEXT
-The
-.I yy
-variable passed to actions contains the state of the parser plus any
-additional fields defined by YY_CTX_MEMBERS. Theses fields can be
-used to store application\-specific information that is global to a
-particular call of
-.IR yyparse ().
-A trivial but complete
-.I leg
-example follows in which the yycontext
-structure is extended with a
-.I count
-of the number of newline characters
-seen in the input so far (the grammar otherwise consumes and ignores
-the entire input). The caller of
-.IR yyparse ()
-uses
-.I count
-to print the number of lines of input that were read.
-
-.nf
-
- %{
- #define YY_CTX_LOCAL 1
- #define YY_CTX_MEMBERS \\
- int count;
- %}
-
- Char = ('\\n' | '\\r\\n' | '\\r') { yy\->count++ }
- | .
-
- %%
-
- #include <stdio.h>
- #include <string.h>
-
- int main()
- {
- /* create a local parser context in automatic storage */
- yycontext yy;
- /* the context *must* be initialised to zero before first use*/
- memset(&yy, 0, sizeof(yy));
-
- while (yyparse(&yy))
- ;
- printf("%d newlines\\n", yy.count);
-
- /* release all resources associated with the context */
- yyrelease(&yy);
-
- return 0;
- }
-
-.fi
-.SH DIAGNOSTICS
-.I minipeg
-warns about the following conditions while converting a grammar into a parser.
-.TP
-.B syntax error
-The input grammar was malformed in some way. The error message will
-include the text about to be matched (often backed up a huge amount
-from the actual location of the error) and the line number of the most
-recently considered character (which is often the real location of the
-problem).
-.TP
-.B rule 'foo' used but not defined
-The grammar referred to a rule named 'foo' but no definition for it
-was given. Attempting to use the generated parser will likely result
-in errors from the linker due to undefined symbols associated with the
-missing rule.
-.TP
-.B rule 'foo' defined but not used
-The grammar defined a rule named 'foo' and then ignored it. The code
-associated with the rule is included in the generated parser which
-will in all other respects be healthy.
-.TP
-.B possible infinite left recursion in rule 'foo'
-There exists at least one path through the grammar that leads from the
-rule 'foo' back to (a recursive invocation of) the same rule without
-consuming any input.
-.PP
-Left recursion, especially that found in standards documents, is
-often 'direct' and implies trivial repetition.
-.nf
-
- # (6.7.6)
- direct\-abstract\-declarator =
- LPAREN abstract\-declarator RPAREN
- | direct\-abstract\-declarator? LBRACKET assign\-expr? RBRACKET
- | direct\-abstract\-declarator? LBRACKET STAR RBRACKET
- | direct\-abstract\-declarator? LPAREN param\-type\-list? RPAREN
-
-.fi
-The recursion can easily be eliminated by converting the parts of the
-pattern following the recursion into a repeatable suffix.
-.nf
-
- # (6.7.6)
- direct\-abstract\-declarator =
- direct\-abstract\-declarator\-head?
- direct\-abstract\-declarator\-tail*
-
- direct\-abstract\-declarator\-head =
- LPAREN abstract\-declarator RPAREN
-
- direct\-abstract\-declarator\-tail =
- LBRACKET assign\-expr? RBRACKET
- | LBRACKET STAR RBRACKET
- | LPAREN param\-type\-list? RPAREN
-
-.fi
-.SH CAVEATS
-A parser that accepts empty input will
-.I always
-succeed. Consider the following example, not atypical of a first
-attempt to write a PEG\-based parser:
-.nf
-
- Program = Expression*
- Expression = "whatever"
- %%
- int main() {
- while (yyparse())
- puts("success!");
- return 0;
- }
-
-.fi
-This program loops forever, no matter what (if any) input is provided
-on stdin. Many fixes are possible, the easiest being to insist that
-the parser always consumes some non\-empty input. Changing the first
-line to
-.nf
-
- Program = Expression+
-
-.fi
-accomplishes this. If the parser is expected to consume the entire
-input, then explicitly requiring the end\-of\-file is also highly
-recommended:
-.nf
-
- Program = Expression+ !.
-
-.fi
-This works because the parser will only fail to match ("!" predicate)
-any character at all ("." expression) when it attempts to read beyond
-the end of the input.
-.SH BUGS
-.PP
-The 'yy' and 'YY' prefixes cannot be changed.
-.PP
-Left recursion is detected in the input grammar but is not handled
-correctly in the generated parser.
-.PP
-Diagnostics for errors in the input grammar are obscure and not
-particularly helpful.
-.PP
-The operators
-.BR ! \ \c
-and
-.B ~
-should really be named the other way around.
-.PP
-Several commonly\-used
-.IR lex (1)
-features (yywrap(), yyin, etc.) are completely absent.
-.PP
-The generated parser does not contain '#line' directives to direct C
-compiler errors back to the grammar description when appropriate.
-.SH SEE ALSO
-D. Val Schorre,
-.I META II, a syntax\-oriented compiler writing language,
-19th ACM National Conference, 1964, pp.\ 41.301\-\-41.311. Describes a
-self\-implementing parser generator for analytic grammars with no
-backtracking.
-.PP
-Alexander Birman,
-.I The TMG Recognition Schema,
-Ph.D. dissertation, Princeton, 1970. A mathematical treatment of the
-power and complexity of recursive\-descent parsing with backtracking.
-.PP
-Bryan Ford,
-.I Parsing Expression Grammars: A Recognition\-Based Syntactic Foundation,
-ACM SIGPLAN Symposium on Principles of Programming Languages, 2004.
-Defines PEGs and analyses them in relation to context\-free and regular
-grammars. Introduces the syntax adopted in
-.IR peg .
-.PP
-The standard Unix utilities
-.IR lex (1)
-and
-.IR yacc (1)
-which influenced the syntax and features of
-.IR minipeg .
-.PP
-The source code for
-.I minipeg
-whose grammar parsers are written using themselves.
-.PP
-The latest version of this software and documentation:
-.nf
-
- https://github.com/andrewchambers/minipeg
-
-.fi
-.SH AUTHOR
-.IR minipeg
-and this manual were originally written by Ian Piumarta
-under the project name peg/leg.
-.IR minipeg
-is a fork of peg/leg by Andrew Chambers.
-.PP
-Please send bug reports and suggestions for improvements to the author
-at the project address.