diff --git a/.gitignore b/.gitignore index ae3a14b801..96ec648af8 100644 --- a/.gitignore +++ b/.gitignore @@ -152,3 +152,5 @@ vlib/v/tests/*.js # ignore .NET7.0 Assembly Files bench/vectors/bin bench/vectors/obj + +autofuzz.log diff --git a/cmd/tools/fuzz/fuzz_v_parser_with_radamsa.sh b/cmd/tools/fuzz/fuzz_v_parser_with_radamsa.sh new file mode 100755 index 0000000000..1717eed4f7 --- /dev/null +++ b/cmd/tools/fuzz/fuzz_v_parser_with_radamsa.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +## Note: radamsa is a fuzzer, available from https://gitlab.com/akihe/radamsa + +## ./v -g cmd/tools/measure/parser_speed.v + +while true; do + radamsa --meta autofuzz.log examples/hello_world.v > x.v; + VFUZZER=true cmd/tools/measure/parser_speed x.v || break; +done diff --git a/cmd/tools/measure/file_lists/expand.v b/cmd/tools/measure/file_lists/expand.v new file mode 100644 index 0000000000..b9fc600b4a --- /dev/null +++ b/cmd/tools/measure/file_lists/expand.v @@ -0,0 +1,29 @@ +module file_lists + +import os + +// expand_files accepts a list of files and folders, and returns a list of all the .v and .vsh files, found in them. +// The input list of files, supports recursive `@file.lst` expansion, where each line is treated as another file/folder. +pub fn expand_files(files []string) ![]string { + mut res := []string{} + for file in files { + if file == '' { + continue + } + if file.starts_with('@') { + lst_path := files[0].all_after('@').trim_space() + listed_files := os.read_file(lst_path)!.split('\n').map(it.trim_space()) + res << expand_files(listed_files)! + continue + } + if os.is_dir(file) { + res << os.walk_ext(file, '.vsh') + res << os.walk_ext(file, '.v') + continue + } + if os.exists(file) { + res << file + } + } + return res +} diff --git a/cmd/tools/measure/fmt_speed.v b/cmd/tools/measure/fmt_speed.v index a213854c9b..c2c0e44bb4 100644 --- a/cmd/tools/measure/fmt_speed.v +++ b/cmd/tools/measure/fmt_speed.v @@ -7,29 +7,33 @@ import v.parser import v.errors import v.scanner import term +import file_lists -const skip_tests = os.getenv_opt('SKIP_TESTS') or { '' }.bool() +const skip_tests = os.getenv('SKIP_TESTS').bool() +const fuzzer_mode = os.getenv('VFUZZER').bool() const comments_mode = scanner.CommentsMode.from(os.getenv('SCANNER_MODE')) or { scanner.CommentsMode.parse_comments } fn main() { - dump(comments_mode) - files := os.args#[1..] - if files.len > 0 && files[0].starts_with('@') { - lst_path := files[0].all_after('@') - listed_files := os.read_file(lst_path)!.split('\n') - process_files(listed_files)! - return + if !fuzzer_mode { + dump(comments_mode) } - process_files(files)! + all_files := file_lists.expand_files(os.args#[1..])! + process_files(all_files)! } fn hline() { + if fuzzer_mode { + return + } println('----------------------------------------------------------------------------------------------------------------------------------------------------------') } fn theader() { + if fuzzer_mode { + return + } println(' Time Tokens Bytes Lines Bytes/Token Errors FMT.len') } @@ -71,7 +75,9 @@ fn process_files(files []string) ! { total_lines += ast_file.nr_lines total_errors += p.errors.len total_fmt_len += formatted_content.len - println('${f_us:10}us ${p.scanner.all_tokens.len:10} ${p.scanner.text.len:10} ${ast_file.nr_lines:10} ${(f64(p.scanner.text.len) / p.scanner.all_tokens.len):13.3} ${p.errors.len:10} ${formatted_content.len:8} ${f}') + if !fuzzer_mode { + println('${f_us:10}us ${p.scanner.all_tokens.len:10} ${p.scanner.text.len:10} ${ast_file.nr_lines:10} ${(f64(p.scanner.text.len) / p.scanner.all_tokens.len):13.3} ${p.errors.len:10} ${formatted_content.len:8} ${f}') + } } hline() theader() diff --git a/cmd/tools/measure/parser_speed.v b/cmd/tools/measure/parser_speed.v index 79afde28a1..7165a24e0f 100644 --- a/cmd/tools/measure/parser_speed.v +++ b/cmd/tools/measure/parser_speed.v @@ -6,29 +6,33 @@ import v.parser import v.errors import v.scanner import term +import file_lists -const skip_tests = os.getenv_opt('SKIP_TESTS') or { '' }.bool() +const skip_tests = os.getenv('SKIP_TESTS').bool() +const fuzzer_mode = os.getenv('VFUZZER').bool() const comments_mode = scanner.CommentsMode.from(os.getenv('SCANNER_MODE')) or { scanner.CommentsMode.skip_comments } fn main() { - dump(comments_mode) - files := os.args#[1..] - if files.len > 0 && files[0].starts_with('@') { - lst_path := files[0].all_after('@') - listed_files := os.read_file(lst_path)!.split('\n') - process_files(listed_files)! - return + if !fuzzer_mode { + dump(comments_mode) } - process_files(files)! + all_files := file_lists.expand_files(os.args#[1..])! + process_files(all_files)! } fn hline() { + if fuzzer_mode { + return + } println('---------------------------------------------------------------------------------------------------------------------------------------------------') } fn theader() { + if fuzzer_mode { + return + } println(' Time Tokens Bytes Lines Bytes/Token Errors') } @@ -53,20 +57,25 @@ fn process_files(files []string) ! { if skip_tests && f.ends_with('_test.v') { continue } - total_files++ // do not measure the scanning, but only the parsing: mut p := new_parser(f, comments_mode, table, pref_) - /// + if fuzzer_mode { + p.scanner.max_eofs = 200 + } + sw.restart() ast_file := p.parse() f_us := sw.elapsed().microseconds() - /// + total_us += f_us total_bytes += p.scanner.text.len total_tokens += p.scanner.all_tokens.len total_lines += ast_file.nr_lines total_errors += p.errors.len - println('${f_us:10}us ${p.scanner.all_tokens.len:10} ${p.scanner.text.len:10} ${ast_file.nr_lines:10} ${(f64(p.scanner.text.len) / p.scanner.all_tokens.len):13.3} ${p.errors.len:10} ${f}') + if !fuzzer_mode { + println('${f_us:10}us ${p.scanner.all_tokens.len:10} ${p.scanner.text.len:10} ${ast_file.nr_lines:10} ${(f64(p.scanner.text.len) / p.scanner.all_tokens.len):13.3} ${p.errors.len:10} ${f}') + } + total_files++ } hline() theader() diff --git a/cmd/tools/measure/scanner_speed.v b/cmd/tools/measure/scanner_speed.v index 49621c2757..1b91c17c7f 100644 --- a/cmd/tools/measure/scanner_speed.v +++ b/cmd/tools/measure/scanner_speed.v @@ -2,30 +2,34 @@ import os import time import term import v.scanner +import file_lists import v.pref -const skip_tests = os.getenv_opt('SKIP_TESTS') or { '' }.bool() +const skip_tests = os.getenv('SKIP_TESTS').bool() +const fuzzer_mode = os.getenv('VFUZZER').bool() const comments_mode = scanner.CommentsMode.from(os.getenv('SCANNER_MODE')) or { scanner.CommentsMode.skip_comments } fn main() { - dump(comments_mode) - files := os.args#[1..] - if files.len > 0 && files[0].starts_with('@') { - lst_path := files[0].all_after('@') - listed_files := os.read_file(lst_path)!.split('\n') - process_files(listed_files)! - return + if !fuzzer_mode { + dump(comments_mode) } - process_files(files)! + all_files := file_lists.expand_files(os.args#[1..])! + process_files(all_files)! } fn hline() { + if fuzzer_mode { + return + } println('----------------------------------------------------------------------------------------------------------------------------------------------------') } fn theader() { + if fuzzer_mode { + return + } println(' Time Tokens Bytes Lines Bytes/Token Errors') } @@ -58,7 +62,9 @@ fn process_files(files []string) ! { total_tokens += s.all_tokens.len total_lines += s.nr_lines total_errors += s.errors.len - println('${f_us:10}us ${s.all_tokens.len:10} ${s.text.len:10} ${s.nr_lines:10} ${(f64(s.text.len) / s.all_tokens.len):13.3f} ${s.errors.len:10} ${f}') + if !fuzzer_mode { + println('${f_us:10}us ${s.all_tokens.len:10} ${s.text.len:10} ${s.nr_lines:10} ${(f64(s.text.len) / s.all_tokens.len):13.3f} ${s.errors.len:10} ${f}') + } } hline() theader() diff --git a/cmd/tools/modules/testing/common.v b/cmd/tools/modules/testing/common.v index c90fd9e4b7..58a1838780 100644 --- a/cmd/tools/modules/testing/common.v +++ b/cmd/tools/modules/testing/common.v @@ -764,6 +764,7 @@ pub fn prepare_test_session(zargs string, folder string, oskipped []string, main $if windows { // skip process/command examples on windows. TODO: remove the need for this, fix os.Command if fnormalised.ends_with('examples/process/command.v') { + skipped << fnormalised.replace(nparent_dir + '/', '') continue } } @@ -771,6 +772,7 @@ pub fn prepare_test_session(zargs string, folder string, oskipped []string, main start := c#[0..testing.header_bytes_to_search_for_module_main] if start.contains('module ') && !start.contains('module main') { skipped << fnormalised.replace(nparent_dir + '/', '') + continue next_file } for skip_prefix in oskipped { skip_folder := skip_prefix + '/' diff --git a/vlib/v/ast/ast.v b/vlib/v/ast/ast.v index 3e5daaba12..5cdc84dfad 100644 --- a/vlib/v/ast/ast.v +++ b/vlib/v/ast/ast.v @@ -2464,6 +2464,9 @@ pub fn (mut lx IndexExpr) recursive_arraymap_set_is_setter() { pub fn all_registers(mut t Table, arch pref.Arch) map[string]ScopeObject { mut res := map[string]ScopeObject{} match arch { + ._auto { + return all_registers(mut t, .amd64) + } .amd64, .i386 { for bit_size, array in ast.x86_no_number_register_list { for name in array { @@ -2522,7 +2525,7 @@ pub fn all_registers(mut t Table, arch pref.Arch) map[string]ScopeObject { // no registers } else { // TODO - panic('all_registers: unhandled arch') + panic('all_registers: unhandled arch: ${arch}') } } diff --git a/vlib/v/parser/assign.v b/vlib/v/parser/assign.v index b5d8584f37..62019b2a63 100644 --- a/vlib/v/parser/assign.v +++ b/vlib/v/parser/assign.v @@ -18,16 +18,12 @@ fn (mut p Parser) assign_stmt() ast.Stmt { return p.partial_assign_stmt(exprs) } -const max_expr_level = 100 - fn (mut p Parser) check_undefined_variables(names []string, val ast.Expr) ! { p.expr_level++ defer { p.expr_level-- } - if p.expr_level > parser.max_expr_level { - return error('expr level > ${parser.max_expr_level}') - } + p.check_expr_level()! match val { ast.Ident { for name in names { diff --git a/vlib/v/parser/expr.v b/vlib/v/parser/expr.v index 0d4c40c450..01391e3ac9 100644 --- a/vlib/v/parser/expr.v +++ b/vlib/v/parser/expr.v @@ -6,6 +6,15 @@ module parser import v.ast import v.token +const max_expr_level = 100 + +@[inline] +fn (mut p Parser) check_expr_level() ! { + if p.expr_level > parser.max_expr_level { + return error('expr level > ${parser.max_expr_level}') + } +} + fn (mut p Parser) expr(precedence int) ast.Expr { return p.check_expr(precedence) or { if token.is_decl(p.tok.kind) && p.disallow_declarations_in_script_mode() { @@ -17,6 +26,11 @@ fn (mut p Parser) expr(precedence int) ast.Expr { fn (mut p Parser) check_expr(precedence int) !ast.Expr { p.trace_parser('expr(${precedence})') + p.expr_level++ + defer { + p.expr_level-- + } + p.check_expr_level()! mut node := ast.empty_expr is_stmt_ident := p.is_stmt_ident p.is_stmt_ident = false diff --git a/vlib/v/parser/if_match.v b/vlib/v/parser/if_match.v index 0af2cd7004..58f66c6d5d 100644 --- a/vlib/v/parser/if_match.v +++ b/vlib/v/parser/if_match.v @@ -163,6 +163,10 @@ fn (mut p Parser) if_expr(is_comptime bool) ast.IfExpr { body_pos := p.tok.pos() p.inside_if = false p.inside_comptime_if = false + if p.opened_scopes > p.max_opened_scopes { + p.error('too many nested conditionals, scopes: ${p.opened_scopes}') + return ast.IfExpr{} + } p.open_scope() stmts := p.parse_block_no_scope(false) branches << ast.IfBranch{ diff --git a/vlib/v/parser/parser.v b/vlib/v/parser/parser.v index 05a4e07d34..f728ce1508 100644 --- a/vlib/v/parser/parser.v +++ b/vlib/v/parser/parser.v @@ -106,9 +106,13 @@ mut: script_mode bool script_mode_start_token token.Token pub mut: - scanner &scanner.Scanner = unsafe { nil } - table &ast.Table = unsafe { nil } - scope &ast.Scope = unsafe { nil } + scanner &scanner.Scanner = unsafe { nil } + table &ast.Table = unsafe { nil } + scope &ast.Scope = unsafe { nil } + + opened_scopes int + max_opened_scopes int = 100 // values above 300 risk stack overflow + errors []errors.Error warnings []errors.Warning notices []errors.Notice @@ -451,10 +455,7 @@ fn (p &Parser) peek_token(n int) token.Token { fn (p &Parser) peek_token_after_var_list() token.Token { mut n := 0 mut tok := p.tok - for { - if tok.kind == .eof { - break - } + for tok.kind != .eof { if tok.kind == .key_mut { n += 2 } else { @@ -546,10 +547,14 @@ fn (p &Parser) is_array_type() bool { } fn (mut p Parser) open_scope() { + if p.opened_scopes > p.max_opened_scopes { + p.error('nested opened scopes limit reached: ${p.max_opened_scopes}') + } p.scope = &ast.Scope{ parent: p.scope start_pos: p.tok.pos } + p.opened_scopes++ } fn (mut p Parser) close_scope() { @@ -561,6 +566,7 @@ fn (mut p Parser) close_scope() { p.scope.end_pos = p.prev_tok.pos p.scope.parent.children << p.scope p.scope = p.scope.parent + p.opened_scopes-- } fn (mut p Parser) parse_block() []ast.Stmt { @@ -1202,7 +1208,7 @@ fn (mut p Parser) asm_stmt(is_top_level bool) ast.AsmStmt { // x86: https://www.felixcloutier.com/x86/ // arm: https://developer.arm.com/documentation/dui0068/b/arm-instruction-reference mut templates := []ast.AsmTemplate{} - for p.tok.kind !in [.semicolon, .rcbr] { + for p.tok.kind !in [.semicolon, .rcbr, .eof] { template_pos := p.tok.pos() mut name := '' if p.tok.kind == .name && arch == .amd64 && p.tok.lit in ['rex', 'vex', 'xop'] { @@ -1299,7 +1305,7 @@ fn (mut p Parser) asm_stmt(is_top_level bool) ast.AsmStmt { } } else { - verror('p.parse_number_literal() invalid output: `${number_lit}`') + p.error('p.parse_number_literal() invalid output: `${number_lit}`') } } } @@ -1441,7 +1447,7 @@ fn (mut p Parser) reg_or_alias() ast.AsmArg { if x is ast.AsmRegister { return ast.AsmArg(x as ast.AsmRegister) } else { - verror('non-register ast.ScopeObject found in scope') + p.error('non-register ast.ScopeObject found in scope') return ast.AsmDisp{} // should not be reached } } else if p.prev_tok.len >= 2 && p.prev_tok.lit[0] in [`b`, `f`] @@ -1713,6 +1719,10 @@ fn (mut p Parser) asm_ios(output bool) []ast.AsmIO { return [] } for { + if p.tok.kind == .eof { + p.error('reached eof in asm_ios') + return [] + } pos := p.tok.pos() mut constraint := '' @@ -1747,6 +1757,7 @@ fn (mut p Parser) asm_ios(output bool) []ast.AsmIO { // Numbered constraints - https://gcc.gnu.org/onlinedocs/gcc/Simple-Constraints.html if p.tok.lit.int() >= 10 { p.error_with_pos('The digit must be between 0 and 9 only', pos) + return [] } p.check(.number) } else { @@ -1759,6 +1770,7 @@ fn (mut p Parser) asm_ios(output bool) []ast.AsmIO { expr = expr.expr } else { p.error('asm in/output must be enclosed in brackets') + return [] } mut alias := '' if p.tok.kind == .key_as { @@ -4348,6 +4360,10 @@ fn (mut p Parser) type_decl() ast.TypeDecl { // type SumType = Aaa | Bbb | Ccc if sum_variants.len > 1 { for variant in sum_variants { + if variant.typ == 0 { + // the type symbol is probably coming from another .v file + continue + } variant_sym := p.table.sym(variant.typ) // TODO: implement this check for error too if variant_sym.kind == .none_ { @@ -4394,9 +4410,13 @@ fn (mut p Parser) type_decl() ast.TypeDecl { } // sum_variants will have only one element parent_type := sum_variants[0].typ - parent_sym := p.table.sym(parent_type) pidx := parent_type.idx() - p.check_for_impure_v(parent_sym.language, decl_pos) + mut parent_language := ast.Language.v + if parent_type != 0 { + parent_sym := p.table.sym(parent_type) + parent_language = parent_sym.language + p.check_for_impure_v(parent_sym.language, decl_pos) + } prepend_mod_name := if language == .v { p.prepend_mod(name) } else { name } // `C.time_t`, not `time.C.time_t` idx := p.table.register_sym(ast.TypeSymbol{ kind: .alias @@ -4406,7 +4426,7 @@ fn (mut p Parser) type_decl() ast.TypeDecl { parent_idx: pidx info: ast.Alias{ parent_type: parent_type - language: parent_sym.language + language: parent_language } is_pub: is_pub }) @@ -4474,11 +4494,6 @@ fn (p &Parser) new_true_expr() ast.Expr { } } -@[noreturn] -fn verror(s string) { - util.verror('parser error', s) -} - fn (mut p Parser) top_level_statement_start() { if p.scanner.comments_mode == .toplevel_comments { p.scanner.set_is_inside_toplevel_statement(true) diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v index b6c992deea..f0be8bd260 100644 --- a/vlib/v/scanner/scanner.v +++ b/vlib/v/scanner/scanner.v @@ -53,6 +53,7 @@ pub mut: all_tokens []token.Token // *only* used in comments_mode: .toplevel_comments, contains all tokens tidx int eofs int + max_eofs int = 50 inter_cbr_count int pref &pref.Preferences error_details []string @@ -557,7 +558,7 @@ fn (mut s Scanner) skip_whitespace() { fn (mut s Scanner) end_of_file() token.Token { s.eofs++ - if s.eofs > 50 { + if s.eofs > s.max_eofs { s.line_nr-- if s.file_path == scanner.internally_generated_v_code { // show a bit more context for that case, since the source may not be easily visible by just inspecting a source file on the filesystem @@ -566,7 +567,7 @@ fn (mut s Scanner) end_of_file() token.Token { dump(s.text.len) } panic( - 'the end of file `${s.file_path}` has been reached 50 times already, the v parser is probably stuck.\n' + + 'the end of file `${s.file_path}` has been reached ${s.max_eofs} times already, the v parser is probably stuck.\n' + 'This should not happen. Please report the bug here, and include the last 2-3 lines of your source code:\n' + 'https://github.com/vlang/v/issues/new?labels=Bug&template=bug_report.md') } diff --git a/vlib/v/util/errors.v b/vlib/v/util/errors.v index 54d9d10e74..0590da1882 100644 --- a/vlib/v/util/errors.v +++ b/vlib/v/util/errors.v @@ -156,7 +156,7 @@ pub fn source_file_context(kind string, filepath string, pos token.Pos) []string aline := mu.max(0, mu.min(source_lines.len - 1, pos.line_nr + util.error_context_after)) tab_spaces := ' ' for iline := bline; iline <= aline; iline++ { - sline := source_lines[iline] + sline := source_lines[iline] or { '' } start_column := mu.max(0, mu.min(pos.col, sline.len)) end_column := mu.max(0, mu.min(pos.col + mu.max(0, pos.len), sline.len)) cline := if iline == pos.line_nr { @@ -179,7 +179,7 @@ pub fn source_file_context(kind string, filepath string, pos token.Pos) []string i++ } else { char_len := utf8_char_len(sline[i]) - spaces := ' '.repeat(utf8_str_visible_length(sline[i..i + char_len])) + spaces := ' '.repeat(utf8_str_visible_length(sline#[i..i + char_len])) pointerline_builder.write_string(spaces) i += char_len }