530 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Groff
		
	
	
	
	
	
			
		
		
	
	
			530 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Groff
		
	
	
	
	
	
.de EX
 | 
						|
.nf
 | 
						|
.ft CW
 | 
						|
..
 | 
						|
.de EE
 | 
						|
.br
 | 
						|
.fi
 | 
						|
.ft 1
 | 
						|
..
 | 
						|
awk
 | 
						|
.TH AWK 1
 | 
						|
.CT 1 files prog_other
 | 
						|
.SH NAME
 | 
						|
awk \- pattern-directed scanning and processing language
 | 
						|
.SH SYNOPSIS
 | 
						|
.B awk
 | 
						|
[
 | 
						|
.BI \-F
 | 
						|
.I fs
 | 
						|
]
 | 
						|
[
 | 
						|
.BI \-v
 | 
						|
.I var=value
 | 
						|
]
 | 
						|
[
 | 
						|
.I 'prog'
 | 
						|
|
 | 
						|
.BI \-f
 | 
						|
.I progfile
 | 
						|
]
 | 
						|
[
 | 
						|
.I file ...
 | 
						|
]
 | 
						|
.SH DESCRIPTION
 | 
						|
.I Awk
 | 
						|
scans each input
 | 
						|
.I file
 | 
						|
for lines that match any of a set of patterns specified literally in
 | 
						|
.IR prog
 | 
						|
or in one or more files
 | 
						|
specified as
 | 
						|
.B \-f
 | 
						|
.IR progfile .
 | 
						|
With each pattern
 | 
						|
there can be an associated action that will be performed
 | 
						|
when a line of a
 | 
						|
.I file
 | 
						|
matches the pattern.
 | 
						|
Each line is matched against the
 | 
						|
pattern portion of every pattern-action statement;
 | 
						|
the associated action is performed for each matched pattern.
 | 
						|
The file name 
 | 
						|
.B \-
 | 
						|
means the standard input.
 | 
						|
Any
 | 
						|
.IR file
 | 
						|
of the form
 | 
						|
.I var=value
 | 
						|
is treated as an assignment, not a filename,
 | 
						|
and is executed at the time it would have been opened if it were a filename.
 | 
						|
The option
 | 
						|
.B \-v
 | 
						|
followed by
 | 
						|
.I var=value
 | 
						|
is an assignment to be done before
 | 
						|
.I prog
 | 
						|
is executed;
 | 
						|
any number of
 | 
						|
.B \-v
 | 
						|
options may be present.
 | 
						|
The
 | 
						|
.B \-F
 | 
						|
.IR fs
 | 
						|
option defines the input field separator to be the regular expression
 | 
						|
.IR fs.
 | 
						|
.PP
 | 
						|
An input line is normally made up of fields separated by white space,
 | 
						|
or by regular expression
 | 
						|
.BR FS .
 | 
						|
The fields are denoted
 | 
						|
.BR $1 ,
 | 
						|
.BR $2 ,
 | 
						|
\&..., while
 | 
						|
.B $0
 | 
						|
refers to the entire line.
 | 
						|
If
 | 
						|
.BR FS
 | 
						|
is null, the input line is split into one field per character.
 | 
						|
.PP
 | 
						|
A pattern-action statement has the form
 | 
						|
.IP
 | 
						|
.IB pattern " { " action " }
 | 
						|
.PP
 | 
						|
A missing 
 | 
						|
.BI { " action " }
 | 
						|
means print the line;
 | 
						|
a missing pattern always matches.
 | 
						|
Pattern-action statements are separated by newlines or semicolons.
 | 
						|
.PP
 | 
						|
An action is a sequence of statements.
 | 
						|
A statement can be one of the following:
 | 
						|
.PP
 | 
						|
.EX
 | 
						|
.ta \w'\f(CWdelete array[expression]'u
 | 
						|
.RS
 | 
						|
.nf
 | 
						|
.ft CW
 | 
						|
if(\fI expression \fP)\fI statement \fP\fR[ \fPelse\fI statement \fP\fR]\fP
 | 
						|
while(\fI expression \fP)\fI statement\fP
 | 
						|
for(\fI expression \fP;\fI expression \fP;\fI expression \fP)\fI statement\fP
 | 
						|
for(\fI var \fPin\fI array \fP)\fI statement\fP
 | 
						|
do\fI statement \fPwhile(\fI expression \fP)
 | 
						|
break
 | 
						|
continue
 | 
						|
{\fR [\fP\fI statement ... \fP\fR] \fP}
 | 
						|
\fIexpression\fP	#\fR commonly\fP\fI var = expression\fP
 | 
						|
print\fR [ \fP\fIexpression-list \fP\fR] \fP\fR[ \fP>\fI expression \fP\fR]\fP
 | 
						|
printf\fI format \fP\fR[ \fP,\fI expression-list \fP\fR] \fP\fR[ \fP>\fI expression \fP\fR]\fP
 | 
						|
return\fR [ \fP\fIexpression \fP\fR]\fP
 | 
						|
next	#\fR skip remaining patterns on this input line\fP
 | 
						|
nextfile	#\fR skip rest of this file, open next, start at top\fP
 | 
						|
delete\fI array\fP[\fI expression \fP]	#\fR delete an array element\fP
 | 
						|
delete\fI array\fP	#\fR delete all elements of array\fP
 | 
						|
exit\fR [ \fP\fIexpression \fP\fR]\fP	#\fR exit immediately; status is \fP\fIexpression\fP
 | 
						|
.fi
 | 
						|
.RE
 | 
						|
.EE
 | 
						|
.DT
 | 
						|
.PP
 | 
						|
Statements are terminated by
 | 
						|
semicolons, newlines or right braces.
 | 
						|
An empty
 | 
						|
.I expression-list
 | 
						|
stands for
 | 
						|
.BR $0 .
 | 
						|
String constants are quoted \&\f(CW"\ "\fR,
 | 
						|
with the usual C escapes recognized within.
 | 
						|
Expressions take on string or numeric values as appropriate,
 | 
						|
and are built using the operators
 | 
						|
.B + \- * / % ^
 | 
						|
(exponentiation), and concatenation (indicated by white space).
 | 
						|
The operators
 | 
						|
.B
 | 
						|
! ++ \-\- += \-= *= /= %= ^= > >= < <= == != ?:
 | 
						|
are also available in expressions.
 | 
						|
Variables may be scalars, array elements
 | 
						|
(denoted
 | 
						|
.IB x  [ i ] )
 | 
						|
or fields.
 | 
						|
Variables are initialized to the null string.
 | 
						|
Array subscripts may be any string,
 | 
						|
not necessarily numeric;
 | 
						|
this allows for a form of associative memory.
 | 
						|
Multiple subscripts such as
 | 
						|
.B [i,j,k]
 | 
						|
are permitted; the constituents are concatenated,
 | 
						|
separated by the value of
 | 
						|
.BR SUBSEP .
 | 
						|
.PP
 | 
						|
The
 | 
						|
.B print
 | 
						|
statement prints its arguments on the standard output
 | 
						|
(or on a file if
 | 
						|
.BI > file
 | 
						|
or
 | 
						|
.BI >> file
 | 
						|
is present or on a pipe if
 | 
						|
.BI | cmd
 | 
						|
is present), separated by the current output field separator,
 | 
						|
and terminated by the output record separator.
 | 
						|
.I file
 | 
						|
and
 | 
						|
.I cmd
 | 
						|
may be literal names or parenthesized expressions;
 | 
						|
identical string values in different statements denote
 | 
						|
the same open file.
 | 
						|
The
 | 
						|
.B printf
 | 
						|
statement formats its expression list according to the format
 | 
						|
(see
 | 
						|
.IR printf (3)) .
 | 
						|
The built-in function
 | 
						|
.BI close( expr )
 | 
						|
closes the file or pipe
 | 
						|
.IR expr .
 | 
						|
The built-in function
 | 
						|
.BI fflush( expr )
 | 
						|
flushes any buffered output for the file or pipe
 | 
						|
.IR expr .
 | 
						|
.PP
 | 
						|
The mathematical functions
 | 
						|
.BR exp ,
 | 
						|
.BR log ,
 | 
						|
.BR sqrt ,
 | 
						|
.BR sin ,
 | 
						|
.BR cos ,
 | 
						|
and
 | 
						|
.BR atan2 
 | 
						|
are built in.
 | 
						|
Other built-in functions:
 | 
						|
.TF length
 | 
						|
.TP
 | 
						|
.B length
 | 
						|
the length of its argument
 | 
						|
taken as a string,
 | 
						|
or of
 | 
						|
.B $0
 | 
						|
if no argument.
 | 
						|
.TP
 | 
						|
.B rand
 | 
						|
random number on (0,1)
 | 
						|
.TP
 | 
						|
.B srand
 | 
						|
sets seed for
 | 
						|
.B rand
 | 
						|
and returns the previous seed.
 | 
						|
.TP
 | 
						|
.B int
 | 
						|
truncates to an integer value
 | 
						|
.TP
 | 
						|
.BI substr( s , " m" , " n\fB)
 | 
						|
the
 | 
						|
.IR n -character
 | 
						|
substring of
 | 
						|
.I s
 | 
						|
that begins at position
 | 
						|
.IR m 
 | 
						|
counted from 1.
 | 
						|
.TP
 | 
						|
.BI index( s , " t" )
 | 
						|
the position in
 | 
						|
.I s
 | 
						|
where the string
 | 
						|
.I t
 | 
						|
occurs, or 0 if it does not.
 | 
						|
.TP
 | 
						|
.BI match( s , " r" )
 | 
						|
the position in
 | 
						|
.I s
 | 
						|
where the regular expression
 | 
						|
.I r
 | 
						|
occurs, or 0 if it does not.
 | 
						|
The variables
 | 
						|
.B RSTART
 | 
						|
and
 | 
						|
.B RLENGTH
 | 
						|
are set to the position and length of the matched string.
 | 
						|
.TP
 | 
						|
.BI split( s , " a" , " fs\fB)
 | 
						|
splits the string
 | 
						|
.I s
 | 
						|
into array elements
 | 
						|
.IB a [1] ,
 | 
						|
.IB a [2] ,
 | 
						|
\&...,
 | 
						|
.IB a [ n ] ,
 | 
						|
and returns
 | 
						|
.IR n .
 | 
						|
The separation is done with the regular expression
 | 
						|
.I fs
 | 
						|
or with the field separator
 | 
						|
.B FS
 | 
						|
if
 | 
						|
.I fs
 | 
						|
is not given.
 | 
						|
An empty string as field separator splits the string
 | 
						|
into one array element per character.
 | 
						|
.TP
 | 
						|
.BI sub( r , " t" , " s\fB)
 | 
						|
substitutes
 | 
						|
.I t
 | 
						|
for the first occurrence of the regular expression
 | 
						|
.I r
 | 
						|
in the string
 | 
						|
.IR s .
 | 
						|
If
 | 
						|
.I s
 | 
						|
is not given,
 | 
						|
.B $0
 | 
						|
is used.
 | 
						|
.TP
 | 
						|
.B gsub
 | 
						|
same as
 | 
						|
.B sub
 | 
						|
except that all occurrences of the regular expression
 | 
						|
are replaced;
 | 
						|
.B sub
 | 
						|
and
 | 
						|
.B gsub
 | 
						|
return the number of replacements.
 | 
						|
.TP
 | 
						|
.BI sprintf( fmt , " expr" , " ...\fB )
 | 
						|
the string resulting from formatting
 | 
						|
.I expr ...
 | 
						|
according to the
 | 
						|
.IR printf (3)
 | 
						|
format
 | 
						|
.I fmt
 | 
						|
.TP
 | 
						|
.BI system( cmd )
 | 
						|
executes
 | 
						|
.I cmd
 | 
						|
and returns its exit status
 | 
						|
.TP
 | 
						|
.BI tolower( str )
 | 
						|
returns a copy of
 | 
						|
.I str
 | 
						|
with all upper-case characters translated to their
 | 
						|
corresponding lower-case equivalents.
 | 
						|
.TP
 | 
						|
.BI toupper( str )
 | 
						|
returns a copy of
 | 
						|
.I str
 | 
						|
with all lower-case characters translated to their
 | 
						|
corresponding upper-case equivalents.
 | 
						|
.PD
 | 
						|
.PP
 | 
						|
The ``function''
 | 
						|
.B getline
 | 
						|
sets
 | 
						|
.B $0
 | 
						|
to the next input record from the current input file;
 | 
						|
.B getline
 | 
						|
.BI < file
 | 
						|
sets
 | 
						|
.B $0
 | 
						|
to the next record from
 | 
						|
.IR file .
 | 
						|
.B getline
 | 
						|
.I x
 | 
						|
sets variable
 | 
						|
.I x
 | 
						|
instead.
 | 
						|
Finally,
 | 
						|
.IB cmd " | getline
 | 
						|
pipes the output of
 | 
						|
.I cmd
 | 
						|
into
 | 
						|
.BR getline ;
 | 
						|
each call of
 | 
						|
.B getline
 | 
						|
returns the next line of output from
 | 
						|
.IR cmd .
 | 
						|
In all cases,
 | 
						|
.B getline
 | 
						|
returns 1 for a successful input,
 | 
						|
0 for end of file, and \-1 for an error.
 | 
						|
.PP
 | 
						|
Patterns are arbitrary Boolean combinations
 | 
						|
(with
 | 
						|
.BR "! || &&" )
 | 
						|
of regular expressions and
 | 
						|
relational expressions.
 | 
						|
Regular expressions are as in
 | 
						|
.IR egrep ; 
 | 
						|
see
 | 
						|
.IR grep (1).
 | 
						|
Isolated regular expressions
 | 
						|
in a pattern apply to the entire line.
 | 
						|
Regular expressions may also occur in
 | 
						|
relational expressions, using the operators
 | 
						|
.BR ~
 | 
						|
and
 | 
						|
.BR !~ .
 | 
						|
.BI / re /
 | 
						|
is a constant regular expression;
 | 
						|
any string (constant or variable) may be used
 | 
						|
as a regular expression, except in the position of an isolated regular expression
 | 
						|
in a pattern.
 | 
						|
.PP
 | 
						|
A pattern may consist of two patterns separated by a comma;
 | 
						|
in this case, the action is performed for all lines
 | 
						|
from an occurrence of the first pattern
 | 
						|
though an occurrence of the second.
 | 
						|
.PP
 | 
						|
A relational expression is one of the following:
 | 
						|
.IP
 | 
						|
.I expression matchop regular-expression
 | 
						|
.br
 | 
						|
.I expression relop expression
 | 
						|
.br
 | 
						|
.IB expression " in " array-name
 | 
						|
.br
 | 
						|
.BI ( expr , expr,... ") in " array-name
 | 
						|
.PP
 | 
						|
where a relop is any of the six relational operators in C,
 | 
						|
and a matchop is either
 | 
						|
.B ~
 | 
						|
(matches)
 | 
						|
or
 | 
						|
.B !~
 | 
						|
(does not match).
 | 
						|
A conditional is an arithmetic expression,
 | 
						|
a relational expression,
 | 
						|
or a Boolean combination
 | 
						|
of these.
 | 
						|
.PP
 | 
						|
The special patterns
 | 
						|
.B BEGIN
 | 
						|
and
 | 
						|
.B END
 | 
						|
may be used to capture control before the first input line is read
 | 
						|
and after the last.
 | 
						|
.B BEGIN
 | 
						|
and
 | 
						|
.B END
 | 
						|
do not combine with other patterns.
 | 
						|
.PP
 | 
						|
Variable names with special meanings:
 | 
						|
.TF FILENAME
 | 
						|
.TP
 | 
						|
.B CONVFMT
 | 
						|
conversion format used when converting numbers
 | 
						|
(default
 | 
						|
.BR "%.6g" )
 | 
						|
.TP
 | 
						|
.B FS
 | 
						|
regular expression used to separate fields; also settable
 | 
						|
by option
 | 
						|
.BI \-F fs.
 | 
						|
.TP
 | 
						|
.BR NF
 | 
						|
number of fields in the current record
 | 
						|
.TP
 | 
						|
.B NR
 | 
						|
ordinal number of the current record
 | 
						|
.TP
 | 
						|
.B FNR
 | 
						|
ordinal number of the current record in the current file
 | 
						|
.TP
 | 
						|
.B FILENAME
 | 
						|
the name of the current input file
 | 
						|
.TP
 | 
						|
.B RS
 | 
						|
input record separator (default newline)
 | 
						|
.TP
 | 
						|
.B OFS
 | 
						|
output field separator (default blank)
 | 
						|
.TP
 | 
						|
.B ORS
 | 
						|
output record separator (default newline)
 | 
						|
.TP
 | 
						|
.B OFMT
 | 
						|
output format for numbers (default
 | 
						|
.BR "%.6g" )
 | 
						|
.TP
 | 
						|
.B SUBSEP
 | 
						|
separates multiple subscripts (default 034)
 | 
						|
.TP
 | 
						|
.B ARGC
 | 
						|
argument count, assignable
 | 
						|
.TP
 | 
						|
.B ARGV
 | 
						|
argument array, assignable;
 | 
						|
non-null members are taken as filenames
 | 
						|
.TP
 | 
						|
.B ENVIRON
 | 
						|
array of environment variables; subscripts are names.
 | 
						|
.PD
 | 
						|
.PP
 | 
						|
Functions may be defined (at the position of a pattern-action statement) thus:
 | 
						|
.IP
 | 
						|
.B
 | 
						|
function foo(a, b, c) { ...; return x }
 | 
						|
.PP
 | 
						|
Parameters are passed by value if scalar and by reference if array name;
 | 
						|
functions may be called recursively.
 | 
						|
Parameters are local to the function; all other variables are global.
 | 
						|
Thus local variables may be created by providing excess parameters in
 | 
						|
the function definition.
 | 
						|
.SH EXAMPLES
 | 
						|
.TP
 | 
						|
.EX
 | 
						|
length($0) > 72
 | 
						|
.EE
 | 
						|
Print lines longer than 72 characters.
 | 
						|
.TP
 | 
						|
.EX
 | 
						|
{ print $2, $1 }
 | 
						|
.EE
 | 
						|
Print first two fields in opposite order.
 | 
						|
.PP
 | 
						|
.EX
 | 
						|
BEGIN { FS = ",[ \et]*|[ \et]+" }
 | 
						|
      { print $2, $1 }
 | 
						|
.EE
 | 
						|
.ns
 | 
						|
.IP
 | 
						|
Same, with input fields separated by comma and/or blanks and tabs.
 | 
						|
.PP
 | 
						|
.EX
 | 
						|
.nf
 | 
						|
	{ s += $1 }
 | 
						|
END	{ print "sum is", s, " average is", s/NR }
 | 
						|
.fi
 | 
						|
.EE
 | 
						|
.ns
 | 
						|
.IP
 | 
						|
Add up first column, print sum and average.
 | 
						|
.TP
 | 
						|
.EX
 | 
						|
/start/, /stop/
 | 
						|
.EE
 | 
						|
Print all lines between start/stop pairs.
 | 
						|
.PP
 | 
						|
.EX
 | 
						|
.nf
 | 
						|
BEGIN	{	# Simulate echo(1)
 | 
						|
	for (i = 1; i < ARGC; i++) printf "%s ", ARGV[i]
 | 
						|
	printf "\en"
 | 
						|
	exit }
 | 
						|
.fi
 | 
						|
.EE
 | 
						|
.SH SEE ALSO
 | 
						|
.IR lex (1), 
 | 
						|
.IR sed (1)
 | 
						|
.br
 | 
						|
A. V. Aho, B. W. Kernighan, P. J. Weinberger,
 | 
						|
.I
 | 
						|
The AWK Programming Language,
 | 
						|
Addison-Wesley, 1988.  ISBN 0-201-07981-X
 | 
						|
.SH BUGS
 | 
						|
There are no explicit conversions between numbers and strings.
 | 
						|
To force an expression to be treated as a number add 0 to it;
 | 
						|
to force it to be treated as a string concatenate
 | 
						|
\&\f(CW""\fP to it.
 | 
						|
.br
 | 
						|
The scope rules for variables in functions are a botch;
 | 
						|
the syntax is worse.
 |