From ef39ac7999b02679fe25d85ca0fd1385b76262a1 Mon Sep 17 00:00:00 2001 From: mes5k Date: Thu, 28 Oct 2004 02:44:41 +0000 Subject: [PATCH] manual as docbook --- docs/manual.xml | 797 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 797 insertions(+) create mode 100644 docs/manual.xml diff --git a/docs/manual.xml b/docs/manual.xml new file mode 100644 index 0000000..f65b499 --- /dev/null +++ b/docs/manual.xml @@ -0,0 +1,797 @@ + + + + + + + +Templatized C++ Command Line Parser Manual + + Michael + Smoot + E + + + 2003,2004 + Michael E. Smoot + + + + +Basic Usage + +TCLAP has a few key classes to be aware of. The first is the +CmdLine (command line) class. This class parses +the command line passed to it according to the arguments that it +contains. Arguments are separate objects that are added to the +CmdLine object one at a time. The five +argument classes are: ValueArg, UnlabeledValueArg, +SwitchArg, MultiArg and UnlabeledMultiArg. +These classes are templatized, which means they can be defined to parse +a value of any type**. Once you add the +arguments to the CmdLine object, it parses the command line +and assigns the data it finds to the specific argument objects it +contains. Your program accesses the values parsed by +calls to the getValue() methods of the argument objects. + + + +Here is a simple example ... + + + +#include <string> +#include <iostream> +#include <algorithm> +#include <tclap/CmdLine.h> + +using namespace TCLAP; +using namespace std; + +int main(int argc, char** argv) +{ + // Wrap everything in a try block. Do this every time, + // because exceptions will be thrown for problems. + try { + + // Define the command line object. + CmdLine cmd("Command description message", ' ', "0.9"); + + // Define a value argument and add it to the command line. + ValueArg<string> nameArg("n","name","Name to print",true,"homer","string"); + cmd.add( nameArg ); + + // Define a switch and add it to the command line. + SwitchArg reverseSwitch("r","reverse","Print name backwards", false); + cmd.add( reverseSwitch ); + + // Parse the args. + cmd.parse( argc, argv ); + + // Get the value parsed by each arg. + string name = nameArg.getValue(); + bool reverseName = reverseSwitch.getValue(); + + // Do what you intend too... + if ( reverseName ) + { + reverse(name.begin(),name.end()); + cout << "My name (spelled backwards) is: " << name << endl; + } + else + cout << "My name is: " << name << endl; + + + } catch (ArgException &e) // catch any exceptions + { cerr << "error: " << e.error() << " for arg " << e.argId() << endl; } +} + + + +The output should look like: + + + + + +% test1 -n mike +My name is: mike + +% test1 -n mike -r +My name (spelled backwards) is: ekim + +% test1 -r -n mike +My name (spelled backwards) is: ekim + +% test1 -r +PARSE ERROR: + One or more required arguments missing! + +Brief USAGE: + test1 [-r] -n <string> [--] [-v] [-h] + +For complete USAGE and HELP type: + test1 --help + + +% test1 --help + +USAGE: + + test1 [-r] -n <string> [--] [-v] [-h] + + +Where: + + -r, --reverse + Print name backwards + + -n <string> --name <string> + (required) (value required) Name to print + + --, --ignore_rest + Ignores the rest of the labeled arguments following this flag. + + -v, --version + Displays version information and exits. + + -h, --help + Displays usage information and exits. + + + Command description message + + + + + + +This example shows a number of different properties of the +library... + +Arguments can appear in any order (...mostly, more on this later). + +The help, version and -- SwitchArgs +are specified automatically. Using either the -h or +--help flag will cause the USAGE message to be displayed, +-v or --version will cause any version information to +be displayed, and -- or --ignore_rest will cause the +remaining labeled arguments to be ingored. These switches are +included automatically on every command line and there is no way to +turn this off (unless you change CmdLine.h yourself). More + later on how we get this to +work. + +If there is an error parsing the command line (e.g. a required +argument isn't provided), the program exits and displays a brief +USAGE and an error message. + +The program name is assumed to always be argv[0], so it isn't +specified directly. + +A delimiter character can be specified. This means that if you +prefer arguments of the style "-s=asdf" instead of "-s asdf", you +can do so. + +Always wrap everything in a try block that catches +ArgExceptions! Any problems found in constructing the +CmdLine or the Args will throw an +ArgException. + + + + + +Argument Properties +Arguments, whatever their type, have a few common basic properties. +These properties are set in the constructors of the arguments. + +First is the flag or the character preceeded by a dash(-) that +signals the beginning of the argument on the command line. +Arguments also have names, which can, if desired also be used +as a flag on the command line, this time preceeded by two dashes +(--) [like the familiar getopt_long()]. +Next is the description of the argument. This is a short +description of the argument displayed in the help/usage message +when needed. +The boolean value in ValueArgs indicates whether the +argument is required to be present (SwitchArgs can't be +required, as that would defeat the purpose). +Next, the default value the arg should assume if the arg isn't +required or entered on the command line. +Last, for ValueArgs is a short description of the type +that the argument expects (yes its an ugly Note that the order of +arguments on the command line (so far) doesn't matter. Any argument +not matching an Arg added to the command line will cause an +exception to be thrown ( for the +most part, with some exceptions). hack). + + + + +Types of Arguments +There are two primary types of arguments: + +SwitchArgs are what the name implies: simple, on/off, +boolean switches. Use SwitchArgs anytime you want to turn +some sort of system property on or off. SwitchArgs don't +parse a value. They return true or false, depending +on whether the switch has been found on the command line and what +the default value was defined as. + +ValueArgs are arguments that read a value of some type +from the command line. Any time you need a file name, a number, +etc. use a ValueArg or one of its variants. UnlabedValueArg, MultiArg, and UnlabeledMultiArg are +special cases of ValueArgs and are described below. All +ValueArgs are templatized** and will attempt to parse +the string its flag matches on the command line as the type it is +specified as. ValueArg<int> will attempt to parse an +int, ValueArg<float> will attempt to parse a float, +etc. If operator>> for the specified type doesn't +recognize the string on the command line as its defined type, then +an exception will be thrown. + + + + +Compiling + +TCLAP is implemented entirely in header files which means you only +need to include CmdLine.h to use the library. + + + #include <tclap/CmdLine.h> + + +You'll need to make sure that your compiler can see the header +files. If you do the usual "make install" then your compiler should +see the files by default. Alternatively, you can use the -I +complier argument to specify the exact location of the libraries. + + + c++ -o my_program -I /some/place/tclap-1.X/include my_program.cpp + + +Where /some/place/tclap-1.X is the place you have unpacked the +distribution. + + + +Finally, if you want to include TCLAP as part of your software +(which is perfectly OK, even encouraged) then simply copy the +contents of /some/place/tclap-1.X/include (the tclap directory and +all of the header files it contains) into your include +directory. + + + +TCLAP was developed on Linux and MacOSX systems. It is also known +to work on Windows, Sun and Alpha platforms. We've made every +effort to keep the library compliant with the ANSI C++ standard so +if your compiler meets the standard, then this library should work +for you. Please let us know if this is not the case! + + + + + +Complications + +Naturally, what we have seen to this point doesn't satisfy all of +our needs. + + + +I want to combine multiple switches into one argument... + +Multiple SwitchArgs can be combined into a single argument +on the command line. If you have switches -a, -b and -c it is valid +to do either: + + + % command -a -b -c + + + +or + + + + % command -abc + + + +or + + + + % command -ba -c + + +This is to make this library more in line with the POSIX and GNU +standards (as I understand them). + + + + +I tried passing multiple values on the command line with the +same flag and it didn't work... + +Correct. You can neither specify mulitple ValueArgs or +SwitchArgs with the same flag in the code nor on the command +line. Exceptions will occur in either case. For SwitchArgs +it simply doesn't make sense to allow a particular flag to be +turned on or off repeatedly on the command line. All you should +ever need is to set your state once by specifying the flag +or not ( yeah but...). + + + +However, there are situations where you might want multiple +values for the same flag to be specified. Imagine a compiler that +allows you to specify multiple directories to search for +libraries... + + + + % fooCompiler -L /dir/num1 -L /dir/num2 file.foo + + + +In situations like this, you will want to use a MultiArg. A +MultiArg is essentially a ValueArg that appends any +value that it matches and parses onto a vector of values. When the +getValue() method is called, a vector of values, instead of +a single value is returned. A MultiArg is declared much like +a ValueArg: + + + + + MultiArg<int> itest("i", "intTest", "multi int test", false,"int" ); + cmd.add( itest ); + + + +Note that MultiArgs can be added to the CmdLine in +any order (unlike UnlabeledMultiArg). + + + + + +I don't like labelling all of my arguments... + +To this point all of our arguments have had labels (flags) +indentifying them on the command line, but there are some +situations where flags are burdensome and not worth the effort. One +example might be if you want to implement a magical command we'll +call copy. All copy does is copy the file specified +in the first argument to the file specified in the second argument. +We can do this using UnlabeledValueArgs which are pretty +much just ValueArgs without the flag specified, which tells +the CmdLine object to treat them accordingly. The code would +look like this: + + + + + UnlabeledValueArg<float> nolabel( "name", "unlabeled test", 3.14, + "nameString" ); + cmd.add( nolabel ); + + + +Everything else is handled identically to what is seen above. The +only difference to be aware of, and this is important: the order +that UnlabeledValueArgs are added to the CmdLine is the +order that they will be parsed!!!! This is not the case +for normal SwitchArgs and ValueArgs. What happens +internally is the first argument that the CmdLine doesn't +recognize is assumed to be the first UnlabeledValueArg and +parses it as such. Note that you are allowed to intersperse labeled +args (SwitchArgs and ValueArgs) in between +UnlabeledValueArgs (either on the command line or in the +declaration), but the UnlabeledValueArgs will still be +parsed in the order they are added. Just remember that order is +important for unlabeled arguments. + + + + + +I want an arbitrary number of unlabeled arguments to be accepted... + + +Don't worry, we've got you covered. Say you want a strange command +that searches each file specified for a given string (let's call it +grep), but you don't want to have to type in all of the file +names or write a script to do it for you. Say, + + + + + % grep pattern *.txt + + + + +First remember that the * is handled by the shell and +expanded accordingly, so what the program grep sees is +really something like: + + % grep pattern file1.txt file2.txt fileZ.txt + +To handle situations where multiple, unlabled arguments are needed, +we provide the UnlabeledMultiArg. UnlabeledMultiArgs +are declared much like everything else, but with only a description +of the arguments. By default, if an UnlabeledMultiArg is +specified, then at least one is required to be present or an +exception will be thrown. The most important thing to remember is, +that like UnlabeledValueArgs: order matters! In fact, an UnlabeledMultiArg must be the last argument added to the +CmdLine!. Here is what a declaration looks like: + + + + + + // + // UnlabeledMultiArg must be the LAST argument added! + // + UnlabeledMultiArg<string> multi("file names"); + cmd.add( multi ); + cmd.parse(argc, argv); + + vector<string> fileNames = multi.getValue(); + + + + + +You must only ever specify one (1) UnlabeledMultiArg. One +UnlabeledMultiArg will read every unlabeled Arg that wasn't +already processed by a UnlabeledValueArg into a +vector of type T. Any UnlabeledValueArg or other +UnlabeledMultiArg specified after the first +UnlabeledMultiArg will be ignored, and if they are required, +exceptions will be thrown. When you call the getValue() +method of the UnlabeledValueArg argument, a vector +will be returned. If you can imagine a situation where there will +be multiple args of multiple types (stings, ints, floats, etc.) +then just declare the UnlabeledMultiArg as type +string and parse the different values yourself or use +several UnlabeledValueArgs. + + + + + +I want one argument or the other, but not both... + +Suppose you have a command that must read input from one of two +possible locations, either a local file or a URL. The command +must read something, so one argument is required, but +not both, yet neither argument is strictly necessary by itself. +This is called "exclusive or" or "XOR". To accomodate this +situation, there is now an option to add two or more Args to +a CmdLine that are exclusively or'd with one another: +xorAdd(). This means that exactly one of the Args must be +set and no more. + + + +xorAdd() comes in two flavors, either xorAdd(Arg& a, Arg& +b) to add just two Args to be xor'd and xorAdd( vector<Arg*> +xorList ) to add more than two Args. + + + + + ValueArg<string> fileArg("f","file","File name to read",true,"homer", + "filename"); + ValueArg<string> urlArg("u","url","URL to load",true, + "http://example.com", "URL"); + + cmd.xorAdd( fileArg, urlArg ); + cmd.parse(argc, argv); + + + +Once one Arg in the xor list is matched on the +CmdLine then the others in the xor list will be marked as +set. The question then, is how to determine which of the +Args has been set? This is accomplished by calling the +isSet() method for each Arg. If the Arg has been +matched on the command line, the isSet() will return TRUE, +whereas if the Arg has been set as a result of matching the +other Arg that was xor'd isSet() will return FALSE. +(Of course, if the Arg was not xor'd and wasn't matched, it +will also return FALSE.) + + + + + + + if ( fileArg.isSet() ) + readFile( fileArg.getValue() ); + else if ( urlArg.isSet() ) + readURL( urlArg.getValue() ); + else + // Should never get here because TCLAP will note that one of the + // required args above has not been set. + throw("Very bad things..."); + + + + + + + + +I have more arguments than single flags make sense for... + +Some commands have so many options that single flags no longer map +sensibly to the available options. In this case, it is desirable to +specify Args using only long options. This one is easy to +accomplish, just make the flag value blank in the Arg +constructor. This will tell the Arg that only the long +option should be matched and will force users to specify the long +option on the command line. The help output is updated accordingly. + + + + + + ValueArg<string> fileArg("","file","File name",true,"homer","filename"); + + SwitchArg caseSwitch("","upperCase","Print in upper case",false); + + + + + + + + +I want to constrain the values allowed for a particular argument... + +There are now constructors for all of the Args that parse +values that allow a list of values to be specified for that +particular Arg. When the value for the Arg is parsed, +it is checked against the list of values specified in the +constructor. If the value is in the list then it is accepted. If +not, then an exception is thrown. Here is a simple example: + + + + vector<string> allowed; + allowed.push_back("homer"); + allowed.push_back("marge"); + allowed.push_back("bart"); + allowed.push_back("lisa"); + allowed.push_back("maggie"); + + ValueArg<string> nameArg("n","name","Name to print",true,"homer",allowed); + cmd.add( nameArg ); + + + +Instead of a type description being specified in the Arg, a +type description is created by concatenating the values in the +allowed list using the operator<< for the specified type. The +help/usage for the Arg therefore lists the allowable values. +Because of this, it is assumed that list should be relatively +small, although there is no limit on this. + + + +Obviously, a list of allowed values isn't always the best way to +constrain things. For instance, one might wish to allow only +integers greater than 0. In this case, the best strategy is for you +to evaluate the value returned from the getValue() call and if it +isn't valid, throw an ArgException. Be sure that the +description provided with the Arg reflects the constraint +you choose. + + + + + +I want the Args to add themselves to the CmdLine... + +New constructors have beed added for each Arg that take a +CmdLine object as an argument. Each Arg then +adds itself to the CmdLine object. There is no +difference in how the Arg is handled between this method and +calling the add() method directly. At the moment, there is +no way to do an xorAdd() from the constructor. Here is an +example: + + + + // Create the command line. + CmdLine cmd("this is a message", '=', "0.99" ); + + // Note that the following args take the "cmd" object as arguments. + SwitchArg btest("B","existTestB", "exist Test B", false, cmd ); + + ValueArg<string> stest("s", "stringTest", "string test", true, "homer", + "string", cmd ); + + UnlabeledValueArg<string> utest("unTest1","unlabeled test one", + "default","string", cmd ); + + // NO add() calls! + + // Parse the command line. + cmd.parse(argc,argv); + + + + + + +Exceptions to the Rules + +Like all good rules, there are many exceptions.... + + + +Ignoring arguments + +The -- flag is automatically included in the CmdLine. +As (almost) per POSIX and GNU standards, any argument specified +after the -- flag is ignored. Almost because if an +UnlabeledValueArg that has not been set or an +UnlabeledMultiArg has been specified, by default we will +assign any arguments beyond the -- to the those arguments as +per the rules above. This is primarily useful if you want to pass +in arguments with a dash as the first character of the argument. It +should be noted that even if the -- flag is passed on the +command line, the CmdLine will still test to make +sure all of the required arguments are present. + + + +Of course, this isn't how POSIX/GNU handle things, they explicitly +ignore arguments after the --. To accomodate this, we can +make both UnlabeledValueArgs and UnlabeledMultiArgs +ignoreable in their constructors. See the +API Documentation for details. + + + + + +Multiple Identical Switches + +If you absolutely must allow for multiple, identical switches, then +don't use a SwitchArg, instead use a MultiArg of type +bool. This means you'll need to specify a 1 or 0 on the +command line with the switch (as values are required), but this +should allow you to turn your favorite switch on and off to your +heart's content. + + + + +Type Descriptions + +Ideally this library would use RTTI to return a human readable name +of the type declared for a particular argument. Unfortunately, at +least for g++, the names returned aren't particularly useful. + + + + + +Visitors + + +Disclaimer: Almost no one will have any use for Visitors, they were +added to provide special handling for default arguments. Nothing +that Visitors do couldn't be accomplished by the user after the +command line has been parsed. If you're still interested, keep +reading... + + + +Some of you may be wondering how we get the --help, +--version and -- arguments to do their thing without +mucking up the CmdLine code with lots of if +statements and type checking. This is accomplished by using a +variation on the Visitor Pattern. Actually, it may not be a Visitor +Pattern at all, but that's what inspired me. + + + +If we want some argument to do some sort of special handling, +besides simply parsing a value, then we add a Visitor +pointer to the Arg. More specifically, we add a +subclass of the Visitor class. Once the argument has +been successfully parsed, the Visitor for that argument is +called. Any data that needs to be operated on is declared in the +Visitor constructor and then operated on in the +visit() method. A Visitor is added to an Arg +as the last argument in its declaration. This may sound +complicated, but it is pretty straightforward. Let's see an +example. + + + +Say you want to add an --authors flag to a program that +prints the names of the authors when present. First subclass +Visitor: + +#include "Visitor.h" +#include <string> +#include <iostream> + +class AuthorVisitor : public Visitor +{ + protected: + string _author; + public: + AuthorVisitor(const string& name ) : Visitor(), _author(name) {} ; + void visit() { cout << "AUTHOR: " << _author << endl; exit(0); }; +}; + +Now include this class definition somewhere and go about creating +your command line. When you create the author switch, add the +AuthorVisitor pointer as follows: + + + SwitchArg author("a","author","Prints author name", false, + new AuthorVisitor("Homer J. Simpson") ); + cmd.add( author ); + + +Now, any time the -a or --author flag is specified, +the program will print the author name, Homer J. Simpson and exit +without processing any further (as specified in the visit() +method). + + + + + +More Information + +For more information, look at the API +Documentation and the examples included with the +distribution. + + + +Happy coding! + + + +** In theory, any type that supports operator>> and +operator<< should work, although I've really only tried +things with basic types like int, float, string, etc. + + +