vdoc: refactor html_highlight and handle strings with string interpolations (eg. ${a}.${b}.${c}) (#19784)

This commit is contained in:
Ned 2023-11-28 21:37:10 +08:00 committed by GitHub
parent 52f40aa6e1
commit 782bf86555
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 182 additions and 96 deletions

View File

@ -37,6 +37,11 @@ enum HighlightTokenTyp {
operator operator
punctuation punctuation
string string
// For string interpolation
opening_string
string_interp
partial_string
closing_string
symbol symbol
none_ none_
module_ module_
@ -330,87 +335,166 @@ fn get_src_link(repo_url string, file_name string, line_nr int) string {
return url.str() return url.str()
} }
fn html_highlight(code string, tb &ast.Table) string { fn write_token(tok token.Token, typ HighlightTokenTyp, mut buf strings.Builder) {
builtin := ['bool', 'string', 'i8', 'i16', 'int', 'i64', 'i128', 'byte', 'u16', 'u32', 'u64', match typ {
'u128', 'rune', 'f32', 'f64', 'int_literal', 'float_literal', 'byteptr', 'voidptr', 'any'] .unone, .operator, .punctuation {
highlight_code := fn (tok token.Token, typ HighlightTokenTyp) string { buf.write_string(tok.kind.str())
lit := if typ in [.unone, .operator, .punctuation] {
tok.kind.str()
} else if typ == .string {
"'${tok.lit}'"
} else if typ == .char {
'`${tok.lit}`'
} else if typ == .comment {
if tok.lit != '' && tok.lit[0] == 1 { '//${tok.lit[1..]}' } else { '//${tok.lit}' }
} else {
tok.lit
} }
if typ in [.unone, .name] { .string_interp {
return lit // tok.kind.str() for this returns $2 instead of $
buf.write_byte(`$`)
}
.opening_string {
buf.write_string("'${tok.lit}")
}
.closing_string {
// A string as the next token of the expression
// inside the string interpolation indicates that
// this is the closing of string interpolation
buf.write_string("${tok.lit}'")
}
.string {
buf.write_string("'${tok.lit}'")
}
.char {
buf.write_string('`${tok.lit}`')
}
.comment {
buf.write_string('//')
if tok.lit != '' && tok.lit[0] == 1 {
buf.write_string(tok.lit[1..])
} else {
buf.write_string(tok.lit)
}
}
else {
buf.write_string(tok.lit)
} }
return '<span class="token ${typ}">${lit}</span>'
} }
}
fn html_highlight(code string, tb &ast.Table) string {
mut s := scanner.new_scanner(code, .parse_comments, &pref.Preferences{ output_mode: .silent }) mut s := scanner.new_scanner(code, .parse_comments, &pref.Preferences{ output_mode: .silent })
mut tok := s.scan() mut tok := s.scan()
mut next_tok := s.scan() mut next_tok := s.scan()
mut buf := strings.new_builder(200) mut buf := strings.new_builder(200)
mut i := 0 mut i := 0
mut inside_string_interp := false
for i < code.len { for i < code.len {
if i == tok.pos { if i != tok.pos {
mut tok_typ := HighlightTokenTyp.unone // All characters not detected by the scanner
match tok.kind { // (mostly whitespaces) go here.
.name {
if tok.lit in builtin || tb.known_type(tok.lit) {
tok_typ = .builtin
} else if next_tok.kind == .lcbr {
tok_typ = .symbol
} else if next_tok.kind == .lpar || (!tok.lit[0].is_capital()
&& next_tok.kind == .lt && next_tok.pos == tok.pos + tok.lit.len) {
tok_typ = .function
} else {
tok_typ = .name
}
}
.comment {
tok_typ = .comment
}
.chartoken {
tok_typ = .char
}
.string {
tok_typ = .string
}
.number {
tok_typ = .number
}
.key_true, .key_false {
tok_typ = .boolean
}
.lpar, .lcbr, .rpar, .rcbr, .lsbr, .rsbr, .semicolon, .colon, .comma, .dot,
.dotdot, .ellipsis {
tok_typ = .punctuation
}
else {
if token.is_key(tok.lit) || token.is_decl(tok.kind) {
tok_typ = .keyword
} else if tok.kind == .decl_assign || tok.kind.is_assign() || tok.is_unary()
|| tok.kind.is_relational() || tok.kind.is_infix() || tok.kind.is_postfix() {
tok_typ = .operator
}
}
}
buf.write_string(highlight_code(tok, tok_typ))
if next_tok.kind != .eof {
i = tok.pos + tok.len
tok = next_tok
next_tok = s.scan()
} else {
break
}
} else {
buf.write_u8(code[i]) buf.write_u8(code[i])
i++ i++
continue
} }
mut tok_typ := HighlightTokenTyp.unone
match tok.kind {
.name {
if tok.lit in highlight_builtin_types || tb.known_type(tok.lit) {
tok_typ = .builtin
} else if next_tok.kind == .lcbr {
tok_typ = .symbol
} else if next_tok.kind == .lpar || (!tok.lit[0].is_capital()
&& next_tok.kind == .lt && next_tok.pos == tok.pos + tok.lit.len) {
tok_typ = .function
} else {
tok_typ = .name
}
}
.comment {
tok_typ = .comment
}
.chartoken {
tok_typ = .char
}
.str_dollar {
tok_typ = .string_interp
inside_string_interp = true
}
.string {
if inside_string_interp {
if next_tok.kind == .str_dollar {
// the " hello " in "${a} hello ${b} world"
tok_typ = .partial_string
} else {
// the " world" in "${a} hello ${b} world"
tok_typ = .closing_string
}
// NOTE: Do not switch inside_string_interp yet!
// It will be handy later when we do some special
// handling in generating code (see code below)
} else if next_tok.kind == .str_dollar {
tok_typ = .opening_string
} else {
tok_typ = .string
}
}
.number {
tok_typ = .number
}
.key_true, .key_false {
tok_typ = .boolean
}
.lpar, .lcbr, .rpar, .rcbr, .lsbr, .rsbr, .semicolon, .colon, .comma, .dot, .dotdot,
.ellipsis {
tok_typ = .punctuation
}
else {
if token.is_key(tok.lit) || token.is_decl(tok.kind) {
tok_typ = .keyword
} else if tok.kind == .decl_assign || tok.kind.is_assign() || tok.is_unary()
|| tok.kind.is_relational() || tok.kind.is_infix() || tok.kind.is_postfix() {
tok_typ = .operator
}
}
}
if tok_typ in [.unone, .name] {
write_token(tok, tok_typ, mut buf)
} else {
// Special handling for "complex" string literals
if tok_typ in [.partial_string, .closing_string] && inside_string_interp {
// rcbr is not rendered when the string on the right
// side of the expr/string interpolation is not empty.
// e.g. "${a}.${b}${c}"
// expectation: "${a}.${b}${c}"
// reality: "${a.${b}${c}"
if tok.lit.len != 0 {
write_token(token.Token{ kind: .rcbr }, .unone, mut buf)
}
inside_string_interp = false
}
// Properly treat and highlight the "string"-related types
// as if they are "string" type.
final_tok_typ := match tok_typ {
.opening_string, .partial_string, .closing_string { HighlightTokenTyp.string }
else { tok_typ }
}
buf.write_string('<span class="token ${final_tok_typ}">')
write_token(tok, tok_typ, mut buf)
buf.write_string('</span>')
}
if next_tok.kind == .eof {
break
}
i = tok.pos + tok.len
// This is to avoid issues that skips any "unused" tokens
// For example: Call expr with complex string literals as arg
if i - 1 == next_tok.pos {
i--
}
tok = next_tok
next_tok = s.scan()
} }
return buf.str() return buf.str()
} }

View File

@ -28,28 +28,27 @@ secret <span class="token operator">:=</span> <span class="token string">'your-2
token <span class="token operator">:=</span> <span class="token function">make_token</span><span class="token punctuation">(</span>secret<span class="token punctuation">)</span> token <span class="token operator">:=</span> <span class="token function">make_token</span><span class="token punctuation">(</span>secret<span class="token punctuation">)</span>
ok <span class="token operator">:=</span> <span class="token function">auth_verify</span><span class="token punctuation">(</span>secret<span class="token punctuation">,</span> token<span class="token punctuation">)</span> ok <span class="token operator">:=</span> <span class="token function">auth_verify</span><span class="token punctuation">(</span>secret<span class="token punctuation">,</span> token<span class="token punctuation">)</span>
dt <span class="token operator">:=</span> sw<span class="token punctuation">.</span><span class="token function">elapsed</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">.</span><span class="token function">microseconds</span><span class="token punctuation">(</span><span class="token punctuation">)</span> dt <span class="token operator">:=</span> sw<span class="token punctuation">.</span><span class="token function">elapsed</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">.</span><span class="token function">microseconds</span><span class="token punctuation">(</span><span class="token punctuation">)</span>
<span class="token function">println</span><span class="token punctuation">(</span>'token: ${token}') <span class="token function">println</span><span class="token punctuation">(</span><span class="token string">'token: </span><span class="token string_interp">$</span>{token}<span class="token string">'</span><span class="token punctuation">)</span>
println('auth_verify(secret, token): ${ok}') <span class="token function">println</span><span class="token punctuation">(</span><span class="token string">'auth_verify(secret, token): </span><span class="token string_interp">$</span>{ok}<span class="token string">'</span><span class="token punctuation">)</span>
println('Elapsed time: ${dt} uS') <span class="token function">println</span><span class="token punctuation">(</span><span class="token string">'Elapsed time: </span><span class="token string_interp">$</span>{dt}<span class="token string"> uS'</span><span class="token punctuation">)</span>
} <span class="token punctuation">}</span>
fn make_token(secret string) string { <span class="token keyword">fn</span> <span class="token function">make_token</span><span class="token punctuation">(</span>secret <span class="token builtin">string</span><span class="token punctuation">)</span> <span class="token builtin">string</span> <span class="token punctuation">{</span>
header := base64.url_encode(json.encode(JwtHeader{'HS256', 'JWT'}).bytes()) header <span class="token operator">:=</span> base64<span class="token punctuation">.</span><span class="token function">url_encode</span><span class="token punctuation">(</span>json<span class="token punctuation">.</span><span class="token function">encode</span><span class="token punctuation">(</span><span class="token symbol">JwtHeader</span><span class="token punctuation">{</span><span class="token string">'HS256'</span><span class="token punctuation">,</span> <span class="token string">'JWT'</span><span class="token punctuation">}</span><span class="token punctuation">)</span><span class="token punctuation">.</span><span class="token function">bytes</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
payload := base64.url_encode(json.encode(JwtPayload{'1234567890', 'John Doe', 1516239022}).bytes()) payload <span class="token operator">:=</span> base64<span class="token punctuation">.</span><span class="token function">url_encode</span><span class="token punctuation">(</span>json<span class="token punctuation">.</span><span class="token function">encode</span><span class="token punctuation">(</span><span class="token symbol">JwtPayload</span><span class="token punctuation">{</span><span class="token string">'1234567890'</span><span class="token punctuation">,</span> <span class="token string">'John Doe'</span><span class="token punctuation">,</span> <span class="token number">1516239022</span><span class="token punctuation">}</span><span class="token punctuation">)</span><span class="token punctuation">.</span><span class="token function">bytes</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
signature := base64.url_encode(hmac.new(secret.bytes(), '${header}.${payload}'.bytes(), signature <span class="token operator">:=</span> base64<span class="token punctuation">.</span><span class="token function">url_encode</span><span class="token punctuation">(</span>hmac<span class="token punctuation">.</span><span class="token function">new</span><span class="token punctuation">(</span>secret<span class="token punctuation">.</span><span class="token function">bytes</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">,</span><span class="token string">'</span><span class="token string_interp">$</span>{header}<span class="token string">.</span><span class="token string_interp">$</span>{payload}<span class="token string">'</span><span class="token punctuation">.</span><span class="token function">bytes</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
sha256.sum, sha256.block_size)) sha256<span class="token punctuation">.</span>sum<span class="token punctuation">,</span> sha256<span class="token punctuation">.</span>block_size<span class="token punctuation">)</span><span class="token punctuation">)</span>
jwt := '${header}.${payload}.${signature}' jwt <span class="token operator">:=</span><span class="token string">'</span><span class="token string_interp">$</span>{header}<span class="token string">.</span><span class="token string_interp">$</span>{payload}<span class="token string">.</span><span class="token string_interp">$</span>{signature}<span class="token string">'</span>
return jwt <span class="token keyword">return</span> jwt
} <span class="token punctuation">}</span>
fn auth_verify(secret string, token string) bool { <span class="token keyword">fn</span> <span class="token function">auth_verify</span><span class="token punctuation">(</span>secret <span class="token builtin">string</span><span class="token punctuation">,</span> token <span class="token builtin">string</span><span class="token punctuation">)</span> <span class="token builtin">bool</span> <span class="token punctuation">{</span>
token_split := token.split('.') token_split <span class="token operator">:=</span> token<span class="token punctuation">.</span><span class="token function">split</span><span class="token punctuation">(</span><span class="token string">'.'</span><span class="token punctuation">)</span>
signature_mirror := hmac.new(secret.bytes(), '${token_split[0]}.${token_split[1]}'.bytes(), signature_mirror <span class="token operator">:=</span> hmac<span class="token punctuation">.</span><span class="token function">new</span><span class="token punctuation">(</span>secret<span class="token punctuation">.</span><span class="token function">bytes</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">,</span><span class="token string">'</span><span class="token string_interp">$</span>{token_split<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>}<span class="token string">.</span><span class="token string_interp">$</span>{token_split<span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">]</span>}<span class="token string">'</span><span class="token punctuation">.</span><span class="token function">bytes</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
sha256.sum, sha256.block_size) sha256<span class="token punctuation">.</span>sum<span class="token punctuation">,</span> sha256<span class="token punctuation">.</span>block_size<span class="token punctuation">)</span>
signature_from_token := base64.url_decode(token_split[2]) signature_from_token <span class="token operator">:=</span> base64<span class="token punctuation">.</span><span class="token function">url_decode</span><span class="token punctuation">(</span>token_split<span class="token punctuation">[</span><span class="token number">2</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
return hmac.equal(signature_from_token, signature_mirror) <span class="token keyword">return</span> hmac<span class="token punctuation">.</span><span class="token function">equal</span><span class="token punctuation">(</span>signature_from_token<span class="token punctuation">,</span> signature_mirror<span class="token punctuation">)</span>
} <span class="token punctuation">}</span></code></pre>
</code></pre>
</section> </section>

View File

@ -137,9 +137,11 @@ fn gen_footer_text(d &doc.Doc, include_timestamp bool) string {
return '${footer_text} Generated on: ${time_str}' return '${footer_text} Generated on: ${time_str}'
} }
const highlight_builtin_types = ['bool', 'string', 'i8', 'i16', 'int', 'i64', 'i128', 'isize',
'byte', 'u8', 'u16', 'u32', 'u64', 'usize', 'u128', 'rune', 'f32', 'f64', 'byteptr', 'voidptr',
'any']
fn color_highlight(code string, tb &ast.Table) string { fn color_highlight(code string, tb &ast.Table) string {
builtin := ['bool', 'string', 'i8', 'i16', 'int', 'i64', 'i128', 'isize', 'byte', 'u8', 'u16',
'u32', 'u64', 'usize', 'u128', 'rune', 'f32', 'f64', 'byteptr', 'voidptr', 'any']
highlight_code := fn (tok token.Token, typ HighlightTokenTyp) string { highlight_code := fn (tok token.Token, typ HighlightTokenTyp) string {
mut lit := '' mut lit := ''
match typ { match typ {
@ -207,14 +209,15 @@ fn color_highlight(code string, tb &ast.Table) string {
mut tok_typ := HighlightTokenTyp.unone mut tok_typ := HighlightTokenTyp.unone
match tok.kind { match tok.kind {
.name { .name {
if (tok.lit in builtin || tb.known_type(tok.lit)) if (tok.lit in highlight_builtin_types || tb.known_type(tok.lit))
&& (next_tok.kind != .lpar || prev.kind !in [.key_fn, .rpar]) { && (next_tok.kind != .lpar || prev.kind !in [.key_fn, .rpar]) {
tok_typ = .builtin tok_typ = .builtin
} else if } else if
(next_tok.kind in [.lcbr, .rpar, .eof, .comma, .pipe, .name, .rcbr, .assign, .key_pub, .key_mut, .pipe, .comma, .comment, .lt, .lsbr] (next_tok.kind in [.lcbr, .rpar, .eof, .comma, .pipe, .name, .rcbr, .assign, .key_pub, .key_mut, .pipe, .comma, .comment, .lt, .lsbr]
&& next_tok.lit !in builtin) && next_tok.lit !in highlight_builtin_types)
&& (prev.kind in [.name, .amp, .lcbr, .rsbr, .key_type, .assign, .dot, .not, .question, .rpar, .key_struct, .key_enum, .pipe, .key_interface, .comment, .ellipsis, .comma] && (prev.kind in [.name, .amp, .lcbr, .rsbr, .key_type, .assign, .dot, .not, .question, .rpar, .key_struct, .key_enum, .pipe, .key_interface, .comment, .ellipsis, .comma]
&& prev.lit !in builtin) && ((tok.lit != '' && tok.lit[0].is_capital()) && prev.lit !in highlight_builtin_types)
&& ((tok.lit != '' && tok.lit[0].is_capital())
|| prev_prev.lit in ['C', 'JS']) { || prev_prev.lit in ['C', 'JS']) {
tok_typ = .symbol tok_typ = .symbol
} else if tok.lit[0].is_capital() && prev.kind == .lpar } else if tok.lit[0].is_capital() && prev.kind == .lpar

View File

@ -103,8 +103,8 @@ pub mut:
base_path string base_path string
table &ast.Table = ast.new_table() table &ast.Table = ast.new_table()
checker checker.Checker = checker.Checker{ checker checker.Checker = checker.Checker{
table: 0 table: unsafe { nil }
pref: 0 pref: unsafe { nil }
} }
fmt fmt.Fmt fmt fmt.Fmt
filename string filename string