291 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			Groff
		
	
	
	
	
	
			
		
		
	
	
			291 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			Groff
		
	
	
	
	
	
.\"	$Id: mdoc.3,v 1.41 2010/05/30 22:56:02 kristaps Exp $
 | 
						|
.\"
 | 
						|
.\" Copyright (c) 2009-2010 Kristaps Dzonsons <kristaps@bsd.lv>
 | 
						|
.\"
 | 
						|
.\" Permission to use, copy, modify, and distribute this software for any
 | 
						|
.\" purpose with or without fee is hereby granted, provided that the above
 | 
						|
.\" copyright notice and this permission notice appear in all copies.
 | 
						|
.\"
 | 
						|
.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 | 
						|
.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 | 
						|
.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 | 
						|
.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 | 
						|
.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 | 
						|
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 | 
						|
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 | 
						|
.\"
 | 
						|
.Dd $Mdocdate: May 30 2010 $
 | 
						|
.Dt MDOC 3
 | 
						|
.Os
 | 
						|
.Sh NAME
 | 
						|
.Nm mdoc ,
 | 
						|
.Nm mdoc_alloc ,
 | 
						|
.Nm mdoc_endparse ,
 | 
						|
.Nm mdoc_free ,
 | 
						|
.Nm mdoc_meta ,
 | 
						|
.Nm mdoc_node ,
 | 
						|
.Nm mdoc_parseln ,
 | 
						|
.Nm mdoc_reset
 | 
						|
.Nd mdoc macro compiler library
 | 
						|
.Sh SYNOPSIS
 | 
						|
.In mandoc.h
 | 
						|
.In mdoc.h
 | 
						|
.Vt extern const char * const * mdoc_macronames;
 | 
						|
.Vt extern const char * const * mdoc_argnames;
 | 
						|
.Ft "struct mdoc *"
 | 
						|
.Fn mdoc_alloc "void *data" "int pflags" "mandocmsg msgs"
 | 
						|
.Ft int
 | 
						|
.Fn mdoc_endparse "struct mdoc *mdoc"
 | 
						|
.Ft void
 | 
						|
.Fn mdoc_free "struct mdoc *mdoc"
 | 
						|
.Ft "const struct mdoc_meta *"
 | 
						|
.Fn mdoc_meta "const struct mdoc *mdoc"
 | 
						|
.Ft "const struct mdoc_node *"
 | 
						|
.Fn mdoc_node "const struct mdoc *mdoc"
 | 
						|
.Ft int
 | 
						|
.Fn mdoc_parseln "struct mdoc *mdoc" "int line" "char *buf"
 | 
						|
.Ft int
 | 
						|
.Fn mdoc_reset "struct mdoc *mdoc"
 | 
						|
.Sh DESCRIPTION
 | 
						|
The
 | 
						|
.Nm mdoc
 | 
						|
library parses lines of
 | 
						|
.Xr mdoc 7
 | 
						|
input
 | 
						|
into an abstract syntax tree (AST).
 | 
						|
.Pp
 | 
						|
In general, applications initiate a parsing sequence with
 | 
						|
.Fn mdoc_alloc ,
 | 
						|
parse each line in a document with
 | 
						|
.Fn mdoc_parseln ,
 | 
						|
close the parsing session with
 | 
						|
.Fn mdoc_endparse ,
 | 
						|
operate over the syntax tree returned by
 | 
						|
.Fn mdoc_node
 | 
						|
and
 | 
						|
.Fn mdoc_meta ,
 | 
						|
then free all allocated memory with
 | 
						|
.Fn mdoc_free .
 | 
						|
The
 | 
						|
.Fn mdoc_reset
 | 
						|
function may be used in order to reset the parser for another input
 | 
						|
sequence.
 | 
						|
See the
 | 
						|
.Sx EXAMPLES
 | 
						|
section for a simple example.
 | 
						|
.Pp
 | 
						|
This section further defines the
 | 
						|
.Sx Types ,
 | 
						|
.Sx Functions
 | 
						|
and
 | 
						|
.Sx Variables
 | 
						|
available to programmers.
 | 
						|
Following that, the
 | 
						|
.Sx Abstract Syntax Tree
 | 
						|
section documents the output tree.
 | 
						|
.Ss Types
 | 
						|
Both functions (see
 | 
						|
.Sx Functions )
 | 
						|
and variables (see
 | 
						|
.Sx Variables )
 | 
						|
may use the following types:
 | 
						|
.Bl -ohang
 | 
						|
.It Vt struct mdoc
 | 
						|
An opaque type defined in
 | 
						|
.Pa mdoc.c .
 | 
						|
Its values are only used privately within the library.
 | 
						|
.It Vt struct mdoc_node
 | 
						|
A parsed node.
 | 
						|
Defined in
 | 
						|
.Pa mdoc.h .
 | 
						|
See
 | 
						|
.Sx Abstract Syntax Tree
 | 
						|
for details.
 | 
						|
.It Vt mandocmsg
 | 
						|
A function callback type defined in
 | 
						|
.Pa mandoc.h .
 | 
						|
.El
 | 
						|
.Ss Functions
 | 
						|
Function descriptions follow:
 | 
						|
.Bl -ohang
 | 
						|
.It Fn mdoc_alloc
 | 
						|
Allocates a parsing structure.
 | 
						|
The
 | 
						|
.Fa data
 | 
						|
pointer is passed to
 | 
						|
.Fa msgs .
 | 
						|
The
 | 
						|
.Fa pflags
 | 
						|
arguments are defined in
 | 
						|
.Pa mdoc.h .
 | 
						|
Returns NULL on failure.
 | 
						|
If non-NULL, the pointer must be freed with
 | 
						|
.Fn mdoc_free .
 | 
						|
.It Fn mdoc_reset
 | 
						|
Reset the parser for another parse routine.
 | 
						|
After its use,
 | 
						|
.Fn mdoc_parseln
 | 
						|
behaves as if invoked for the first time.
 | 
						|
If it returns 0, memory could not be allocated.
 | 
						|
.It Fn mdoc_free
 | 
						|
Free all resources of a parser.
 | 
						|
The pointer is no longer valid after invocation.
 | 
						|
.It Fn mdoc_parseln
 | 
						|
Parse a nil-terminated line of input.
 | 
						|
This line should not contain the trailing newline.
 | 
						|
Returns 0 on failure, 1 on success.
 | 
						|
The input buffer
 | 
						|
.Fa buf
 | 
						|
is modified by this function.
 | 
						|
.It Fn mdoc_endparse
 | 
						|
Signals that the parse is complete.
 | 
						|
Note that if
 | 
						|
.Fn mdoc_endparse
 | 
						|
is called subsequent to
 | 
						|
.Fn mdoc_node ,
 | 
						|
the resulting tree is incomplete.
 | 
						|
Returns 0 on failure, 1 on success.
 | 
						|
.It Fn mdoc_node
 | 
						|
Returns the first node of the parse.
 | 
						|
Note that if
 | 
						|
.Fn mdoc_parseln
 | 
						|
or
 | 
						|
.Fn mdoc_endparse
 | 
						|
return 0, the tree will be incomplete.
 | 
						|
.It Fn mdoc_meta
 | 
						|
Returns the document's parsed meta-data.
 | 
						|
If this information has not yet been supplied or
 | 
						|
.Fn mdoc_parseln
 | 
						|
or
 | 
						|
.Fn mdoc_endparse
 | 
						|
return 0, the data will be incomplete.
 | 
						|
.El
 | 
						|
.Ss Variables
 | 
						|
The following variables are also defined:
 | 
						|
.Bl -ohang
 | 
						|
.It Va mdoc_macronames
 | 
						|
An array of string-ified token names.
 | 
						|
.It Va mdoc_argnames
 | 
						|
An array of string-ified token argument names.
 | 
						|
.El
 | 
						|
.Ss Abstract Syntax Tree
 | 
						|
The
 | 
						|
.Nm
 | 
						|
functions produce an abstract syntax tree (AST) describing input in a
 | 
						|
regular form.
 | 
						|
It may be reviewed at any time with
 | 
						|
.Fn mdoc_nodes ;
 | 
						|
however, if called before
 | 
						|
.Fn mdoc_endparse ,
 | 
						|
or after
 | 
						|
.Fn mdoc_endparse
 | 
						|
or
 | 
						|
.Fn mdoc_parseln
 | 
						|
fail, it may be incomplete.
 | 
						|
.Pp
 | 
						|
This AST is governed by the ontological
 | 
						|
rules dictated in
 | 
						|
.Xr mdoc 7
 | 
						|
and derives its terminology accordingly.
 | 
						|
.Qq In-line
 | 
						|
elements described in
 | 
						|
.Xr mdoc 7
 | 
						|
are described simply as
 | 
						|
.Qq elements .
 | 
						|
.Pp
 | 
						|
The AST is composed of
 | 
						|
.Vt struct mdoc_node
 | 
						|
nodes with block, head, body, element, root and text types as declared
 | 
						|
by the
 | 
						|
.Va type
 | 
						|
field.
 | 
						|
Each node also provides its parse point (the
 | 
						|
.Va line ,
 | 
						|
.Va sec ,
 | 
						|
and
 | 
						|
.Va pos
 | 
						|
fields), its position in the tree (the
 | 
						|
.Va parent ,
 | 
						|
.Va child ,
 | 
						|
.Va next
 | 
						|
and
 | 
						|
.Va prev
 | 
						|
fields) and some type-specific data.
 | 
						|
.Pp
 | 
						|
The tree itself is arranged according to the following normal form,
 | 
						|
where capitalised non-terminals represent nodes.
 | 
						|
.Pp
 | 
						|
.Bl -tag -width "ELEMENTXX" -compact
 | 
						|
.It ROOT
 | 
						|
\(<- mnode+
 | 
						|
.It mnode
 | 
						|
\(<- BLOCK | ELEMENT | TEXT
 | 
						|
.It BLOCK
 | 
						|
\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
 | 
						|
.It ELEMENT
 | 
						|
\(<- TEXT*
 | 
						|
.It HEAD
 | 
						|
\(<- mnode+
 | 
						|
.It BODY
 | 
						|
\(<- mnode+
 | 
						|
.It TAIL
 | 
						|
\(<- mnode+
 | 
						|
.It TEXT
 | 
						|
\(<- [[:printable:],0x1e]*
 | 
						|
.El
 | 
						|
.Pp
 | 
						|
Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
 | 
						|
the BLOCK production: these refer to punctuation marks.
 | 
						|
Furthermore, although a TEXT node will generally have a non-zero-length
 | 
						|
string, in the specific case of
 | 
						|
.Sq \&.Bd \-literal ,
 | 
						|
an empty line will produce a zero-length string.
 | 
						|
Multiple body parts are only found in invocations of
 | 
						|
.Sq \&Bl \-column ,
 | 
						|
where a new body introduces a new phrase.
 | 
						|
.Sh EXAMPLES
 | 
						|
The following example reads lines from stdin and parses them, operating
 | 
						|
on the finished parse tree with
 | 
						|
.Fn parsed .
 | 
						|
This example does not error-check nor free memory upon failure.
 | 
						|
.Bd -literal -offset indent
 | 
						|
struct mdoc *mdoc;
 | 
						|
const struct mdoc_node *node;
 | 
						|
char *buf;
 | 
						|
size_t len;
 | 
						|
int line;
 | 
						|
 | 
						|
line = 1;
 | 
						|
mdoc = mdoc_alloc(NULL, 0, NULL);
 | 
						|
buf = NULL;
 | 
						|
alloc_len = 0;
 | 
						|
 | 
						|
while ((len = getline(&buf, &alloc_len, stdin)) >= 0) {
 | 
						|
    if (len && buflen[len - 1] = '\en')
 | 
						|
        buf[len - 1] = '\e0';
 | 
						|
    if ( ! mdoc_parseln(mdoc, line, buf))
 | 
						|
        errx(1, "mdoc_parseln");
 | 
						|
    line++;
 | 
						|
}
 | 
						|
 | 
						|
if ( ! mdoc_endparse(mdoc))
 | 
						|
    errx(1, "mdoc_endparse");
 | 
						|
if (NULL == (node = mdoc_node(mdoc)))
 | 
						|
    errx(1, "mdoc_node");
 | 
						|
 | 
						|
parsed(mdoc, node);
 | 
						|
mdoc_free(mdoc);
 | 
						|
.Ed
 | 
						|
.Pp
 | 
						|
Please see
 | 
						|
.Pa main.c
 | 
						|
in the source archive for a rigorous reference.
 | 
						|
.Sh SEE ALSO
 | 
						|
.Xr mandoc 1 ,
 | 
						|
.Xr mdoc 7
 | 
						|
.Sh AUTHORS
 | 
						|
The
 | 
						|
.Nm
 | 
						|
library was written by
 | 
						|
.An Kristaps Dzonsons Aq kristaps@bsd.lv .
 |