cppparser: Add preprocessor option (-E) to parse_file

This makes it easier to test the behaviour of the preprocessor
This commit is contained in:
rdb 2024-03-28 17:15:24 +01:00
parent a769808af0
commit fb68f82c5b
5 changed files with 721 additions and 7 deletions

View File

@ -232,6 +232,59 @@ CPPPreprocessor() {
_verbose = 1;
}
/**
*
*/
bool CPPPreprocessor::
preprocess_file(const Filename &filename) {
Filename canonical(filename);
canonical.make_canonical();
CPPFile file(canonical, filename, CPPFile::S_local);
// Don't read it if we included it before and it had #pragma once.
ParsedFiles::iterator it = _parsed_files.find(file);
if (it != _parsed_files.end() && it->_pragma_once) {
// But mark it as local.
it->_source = CPPFile::S_local;
return true;
}
if (!init_cpp(file)) {
std::cerr << "Unable to read " << filename << "\n";
return false;
}
int line_number = 1;
int nesting = 0;
bool next_space = false;
CPPToken token = get_next_token();
while (!token.is_eof()) {
if (token._token == '}') {
nesting -= 1;
}
if (token._lloc.first_line > line_number) {
// Token is on a different line, so insert a newline.
std::cout << "\n";
line_number = token._lloc.first_line;
indent(std::cout, nesting * 2);
}
else if (next_space && token._token != ';' && token._token != ':' && token._token != ',' && token._token != ')') {
// The above tokens never need a preceding space
std::cout << " ";
}
if (token._token == '{') {
nesting += 1;
}
next_space = (token._token != '(' && token._token != '~');
token.output_code(std::cout);
token = get_next_token();
}
std::cout << "\n";
return get_error_count() == 0;
}
/**
* Sets the verbosity level of the parser. At 0, no warnings will be
* reported; at 1 or higher, expect to get spammed.
@ -353,8 +406,8 @@ get_next_token0() {
ident->_names.back().set_templ
(nested_parse_template_instantiation(decl->get_template_scope()));
token = internal_get_next_token();
} else {
error(string("unknown template '") + ident->get_fully_scoped_name() + "'", loc);
//} else {
// error(string("unknown template '") + ident->get_fully_scoped_name() + "'", loc);
}
}

View File

@ -41,6 +41,8 @@ class CPPPreprocessor {
public:
CPPPreprocessor();
bool preprocess_file(const Filename &filename);
void set_verbose(int verbose);
int get_verbose() const;

View File

@ -15,6 +15,7 @@
#include "cppExpression.h"
#include "cppIdentifier.h"
#include "cppBison.h"
#include "pdtoa.h"
#include <ctype.h>
@ -85,7 +86,6 @@ is_eof() const {
return _token == 0;
}
/**
*
*/
@ -683,3 +683,648 @@ output(std::ostream &out) const {
}
}
}
/**
*
*/
void CPPToken::
output_code(std::ostream &out) const {
switch (_token) {
case REAL:
{
char buffer[128];
pdtoa(_lval.u.real, buffer);
out << buffer;
}
break;
case INTEGER:
out << _lval.u.integer;
break;
case CHAR_TOK:
case SIMPLE_STRING:
out << (_token == CHAR_TOK ? '\'' : '"');
for (char c : _lval.str) {
switch (c) {
case '\n':
out << "\\n";
break;
case '\t':
out << "\\t";
break;
case '\r':
out << "\\r";
break;
case '\a':
out << "\\a";
break;
case '\b':
out << "\\b";
break;
case '\v':
out << "\\v";
break;
case '\f':
out << "\\f";
break;
case '\'':
out << "\\\'";
break;
case '\\':
out << "\\\\";
break;
default:
if (isprint(c)) {
out << c;
} else {
out << '\\' << std::oct << std::setw(3) << std::setfill('0') << (int)(c)
<< std::dec << std::setw(0);
}
break;
}
}
out << (_token == CHAR_TOK ? '\'' : '"');
break;
case STRING_LITERAL:
out << *_lval.u.expr;
break;
case SIMPLE_IDENTIFIER:
out << _lval.str;
break;
case IDENTIFIER:
out << *_lval.u.identifier;
break;
case TYPENAME_IDENTIFIER:
out << *_lval.u.identifier;
break;
case SCOPING:
out << *_lval.u.identifier << "::";
break;
case TYPEDEFNAME:
out << _lval.str;
break;
case ELLIPSIS:
out << "...";
break;
case OROR:
out << "||";
break;
case ANDAND:
out << "&&";
break;
case EQCOMPARE:
out << "==";
break;
case NECOMPARE:
out << "!=";
break;
case LECOMPARE:
out << "<=";
break;
case GECOMPARE:
out << ">=";
break;
case LSHIFT:
out << "<<";
break;
case RSHIFT:
out << ">>";
break;
case POINTSAT_STAR:
out << "->*";
break;
case DOT_STAR:
out << ".*";
break;
case UNARY_NOT:
out << "!";
break;
case UNARY_MINUS:
out << "-";
break;
case UNARY_PLUS:
out << "+";
break;
case UNARY_NEGATE:
out << "~";
break;
case UNARY_STAR:
out << "*";
break;
case UNARY_REF:
out << "&";
break;
case POINTSAT:
out << "->";
break;
case SCOPE:
out << "::";
break;
case PLUSPLUS:
out << "++";
break;
case MINUSMINUS:
out << "--";
break;
case TIMESEQUAL:
out << "*=";
break;
case DIVIDEEQUAL:
out << "/=";
break;
case MODEQUAL:
out << "%=";
break;
case PLUSEQUAL:
out << "+=";
break;
case MINUSEQUAL:
out << "-=";
break;
case OREQUAL:
out << "|=";
break;
case ANDEQUAL:
out << "&=";
break;
case XOREQUAL:
out << "^=";
break;
case LSHIFTEQUAL:
out << "<<=";
break;
case RSHIFTEQUAL:
out << ">>=";
break;
case ATTR_LEFT:
out << "[[";
break;
case ATTR_RIGHT:
out << "]]";
break;
case KW_ALIGNAS:
out << "alignas";
break;
case KW_ALIGNOF:
out << "alignof";
break;
case KW_AUTO:
out << "auto";
break;
case KW_BEGIN_PUBLISH:
out << "__begin_publish";
break;
case KW_BLOCKING:
out << "__blocking";
break;
case KW_BOOL:
out << "bool";
break;
case KW_CATCH:
out << "catch";
break;
case KW_CHAR:
out << "char";
break;
case KW_CHAR8_T:
out << "char8_t";
break;
case KW_CHAR16_T:
out << "char16_t";
break;
case KW_CHAR32_T:
out << "char32_t";
break;
case KW_CLASS:
out << "class";
break;
case KW_CONST:
out << "const";
break;
case KW_CONSTEVAL:
out << "consteval";
break;
case KW_CONSTEXPR:
out << "constexpr";
break;
case KW_CONSTINIT:
out << "constinit";
break;
case KW_CONST_CAST:
out << "const_cast";
break;
case KW_DECLTYPE:
out << "decltype";
break;
case KW_DEFAULT:
out << "default";
break;
case KW_DELETE:
out << "delete";
break;
case KW_DOUBLE:
out << "double";
break;
case KW_DYNAMIC_CAST:
out << "dynamic_cast";
break;
case KW_ELSE:
out << "else";
break;
case KW_ENUM:
out << "enum";
break;
case KW_EXPLICIT:
out << "explicit";
break;
case KW_EXPLICIT_LPAREN:
out << "explicit(";
break;
case KW_EXTENSION:
out << "__extension";
break;
case KW_EXTERN:
out << "extern";
break;
case KW_FALSE:
out << "false";
break;
case KW_FINAL:
out << "final";
break;
case KW_FLOAT:
out << "float";
break;
case KW_FRIEND:
out << "friend";
break;
case KW_FOR:
out << "for";
break;
case KW_GOTO:
out << "goto";
break;
case KW_HAS_VIRTUAL_DESTRUCTOR:
out << "__has_virtual_destructor";
break;
case KW_IF:
out << "if";
break;
case KW_INLINE:
out << "inline";
break;
case KW_INT:
out << "int";
break;
case KW_IS_ABSTRACT:
out << "__is_abstract";
break;
case KW_IS_BASE_OF:
out << "__is_base_of";
break;
case KW_IS_CLASS:
out << "__is_class";
break;
case KW_IS_CONSTRUCTIBLE:
out << "__is_constructible";
break;
case KW_IS_CONVERTIBLE_TO:
out << "__is_convertible_to";
break;
case KW_IS_DESTRUCTIBLE:
out << "__is_destructible";
break;
case KW_IS_EMPTY:
out << "__is_empty";
break;
case KW_IS_ENUM:
out << "__is_enum";
break;
case KW_IS_FINAL:
out << "__is_final";
break;
case KW_IS_FUNDAMENTAL:
out << "__is_fundamental";
break;
case KW_IS_POD:
out << "__is_pod";
break;
case KW_IS_POLYMORPHIC:
out << "__is_polymorphic";
break;
case KW_IS_STANDARD_LAYOUT:
out << "__is_standard_layout";
break;
case KW_IS_TRIVIAL:
out << "__is_trivial";
break;
case KW_IS_TRIVIALLY_COPYABLE:
out << "__is_trivially_copyable";
break;
case KW_IS_UNION:
out << "__is_union";
break;
case KW_LONG:
out << "long";
break;
case KW_MAKE_MAP_KEYS_SEQ:
out << "__make_map_keys_seq";
break;
case KW_MAKE_MAP_PROPERTY:
out << "__make_map_property";
break;
case KW_MAKE_PROPERTY:
out << "__make_property";
break;
case KW_MAKE_PROPERTY2:
out << "__make_property2";
break;
case KW_MAKE_SEQ:
out << "__make_seq";
break;
case KW_MAKE_SEQ_PROPERTY:
out << "__make_seq_property";
break;
case KW_MUTABLE:
out << "mutable";
break;
case KW_NAMESPACE:
out << "namespace";
break;
case KW_NEW:
out << "new";
break;
case KW_NOEXCEPT:
out << "noexcept";
break;
case KW_NOEXCEPT_LPAREN:
out << "noexcept(";
break;
case KW_NULLPTR:
out << "nullptr";
break;
case KW_OPERATOR:
if (_lval.u.identifier != nullptr) {
out << *_lval.u.identifier << "::";
}
out << "operator";
break;
case KW_OVERRIDE:
out << "override";
break;
case KW_PRIVATE:
out << "private";
break;
case KW_PROTECTED:
out << "protected";
break;
case KW_PUBLIC:
out << "public";
break;
case KW_PUBLISHED:
out << "__published";
break;
case KW_REGISTER:
out << "register";
break;
case KW_REINTERPRET_CAST:
out << "reinterpret_cast";
break;
case KW_RETURN:
out << "return";
break;
case KW_SHORT:
out << "short";
break;
case KW_SIGNED:
out << "signed";
break;
case KW_SIZEOF:
out << "sizeof";
break;
case KW_STATIC:
out << "static";
break;
case KW_STATIC_ASSERT:
out << "static_assert";
break;
case KW_STATIC_CAST:
out << "static_cast";
break;
case KW_STRUCT:
out << "struct";
break;
case KW_TEMPLATE:
out << "template";
break;
case KW_THREAD_LOCAL:
out << "thread_local";
break;
case KW_THROW:
out << "throw";
break;
case KW_TRUE:
out << "true";
break;
case KW_TRY:
out << "try";
break;
case KW_TYPEDEF:
out << "typedef";
break;
case KW_TYPEID:
out << "typeid";
break;
case KW_TYPENAME:
out << "typename";
break;
case KW_UNDERLYING_TYPE:
out << "__underlying_type";
break;
case KW_USING:
out << "using";
break;
case KW_UNION:
out << "union";
break;
case KW_UNSIGNED:
out << "unsigned";
break;
case KW_VIRTUAL:
out << "virtual";
break;
case KW_VOID:
out << "void";
break;
case KW_VOLATILE:
out << "volatile";
break;
case KW_WCHAR_T:
out << "wchar_t";
break;
case KW_WHILE:
out << "while";
break;
case START_CPP:
case START_CONST_EXPR:
case START_TYPE:
break;
default:
if (_token < 128 && isprint(_token)) {
out << (char)_token;
} else {
out << "<token " << _token << ">\n";
}
}
}

View File

@ -37,6 +37,7 @@ public:
bool is_eof() const;
void output(std::ostream &out) const;
void output_code(std::ostream &out) const;
int _token;
YYSTYPE _lval;

View File

@ -208,11 +208,12 @@ int
main(int argc, char **argv) {
extern char *optarg;
extern int optind;
const char *optstr = "I:S:D:o:l:vp";
const char *optstr = "I:S:D:o:l:vpE";
preprocess_argv(argc, argv);
parser.set_verbose(2);
bool prompt = false;
bool preprocess = false;
int flag = getopt(argc, argv, optstr);
@ -249,6 +250,10 @@ main(int argc, char **argv) {
prompt = true;
break;
case 'E':
preprocess = true;
break;
default:
exit(1);
}
@ -268,15 +273,23 @@ main(int argc, char **argv) {
<< " -D manifest_name=manifest_definition\n"
<< " -o output_file (ignored)\n"
<< " -v (increase verbosity)\n"
<< " -E (output preprocessed token stream)\n"
<< " -p (prompt for expression instead of dumping output)\n";
exit(1);
}
for (int i = 1; i < argc; i++) {
if (!parser.parse_file(argv[i])) {
cerr << "Error in parsing.\n";
exit(1);
if (preprocess) {
if (!parser.preprocess_file(argv[i])) {
cerr << "Error in preprocessing.\n";
exit(1);
}
} else {
if (!parser.parse_file(argv[i])) {
cerr << "Error in parsing.\n";
exit(1);
}
}
}