From 17d65db828f2c66714fbdfeddd9728facb240d8e Mon Sep 17 00:00:00 2001 From: Makhnev Petr <51853996+i582@users.noreply.github.com> Date: Tue, 24 Jan 2023 23:41:25 +0400 Subject: [PATCH] builtin: add `string.trim_indent()`` method (#17099) --- vlib/builtin/string.v | 95 +++++++++++++++++++ vlib/builtin/string_test.v | 23 +++++ vlib/builtin/string_trim_indent_test.v | 126 +++++++++++++++++++++++++ 3 files changed, 244 insertions(+) create mode 100644 vlib/builtin/string_trim_indent_test.v diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v index b2bbe3526d..9c50aa800d 100644 --- a/vlib/builtin/string.v +++ b/vlib/builtin/string.v @@ -1951,6 +1951,8 @@ pub fn (s string) fields() []string { // Note: the delimiter has to be a byte at this time. That means surrounding // the value in ``. // +// See also: string.trim_indent() +// // Example: // ```v // st := 'Hello there, @@ -2011,6 +2013,99 @@ pub fn (s string) strip_margin_custom(del u8) string { } } +// trim_indent detects a common minimal indent of all the input lines, +// removes it from every line and also removes the first and the last +// lines if they are blank (notice difference blank vs empty). +// +// Note that blank lines do not affect the detected indent level. +// +// In case if there are non-blank lines with no leading whitespace characters +// (no indent at all) then the common indent is 0, and therefore this function +// doesn't change the indentation. +// +// Example: +// ```v +// st := ' +// Hello there, +// this is a string, +// all the leading indents are removed +// and also the first and the last lines if they are blank +// '.trim_indent() +// +// assert st == 'Hello there, +// this is a string, +// all the leading indents are removed +// and also the first and the last lines if they are blank' +// ``` +pub fn (s string) trim_indent() string { + mut lines := s.split_into_lines() + + lines_indents := lines + .filter(!it.is_blank()) + .map(it.indent_width()) + + mut min_common_indent := int(2147483647) // max int + for line_indent in lines_indents { + if line_indent < min_common_indent { + min_common_indent = line_indent + } + } + + // trim first line if it's blank + if lines.len > 0 && lines.first().is_blank() { + lines = lines[1..] + } + + // trim last line if it's blank + if lines.len > 0 && lines.last().is_blank() { + lines = lines[..lines.len - 1] + } + + mut trimmed_lines := []string{cap: lines.len} + + for line in lines { + if line.is_blank() { + trimmed_lines << line + continue + } + + trimmed_lines << line[min_common_indent..] + } + + return trimmed_lines.join('\n') +} + +// indent_width returns the number of spaces or tabs at the beginning of the string. +// Example: assert ' v'.indent_width() == 2 +// Example: assert '\t\tv'.indent_width() == 2 +pub fn (s string) indent_width() int { + for i, c in s { + if !c.is_space() { + return i + } + } + + return 0 +} + +// is_blank returns true if the string is empty or contains only white-space. +// Example: assert ' '.is_blank() +// Example: assert '\t'.is_blank() +// Example: assert 'v'.is_blank() == false +pub fn (s string) is_blank() bool { + if s.len == 0 { + return true + } + + for c in s { + if !c.is_space() { + return false + } + } + + return true +} + // match_glob matches the string, with a Unix shell-style wildcard pattern. // Note: wildcard patterns are NOT the same as regular expressions. // They are much simpler, and do not allow backtracking, captures, etc. diff --git a/vlib/builtin/string_test.v b/vlib/builtin/string_test.v index 9d01d70bcb..904d67aac1 100644 --- a/vlib/builtin/string_test.v +++ b/vlib/builtin/string_test.v @@ -1056,3 +1056,26 @@ fn test_string_is_ascii() { fn test_string_with_zero_byte_escape() { assert '\x00'.bytes() == [u8(0)] } + +fn test_is_blank() { + assert ''.is_blank() + assert ' '.is_blank() + assert ' \t'.is_blank() + assert ' \t + +'.is_blank() + assert ' \t\r'.is_blank() + assert ' \t\r + +'.is_blank() +} + +fn test_indent_width() { + assert 'abc'.indent_width() == 0 + assert ' abc'.indent_width() == 1 + assert ' abc'.indent_width() == 2 + assert '\tabc'.indent_width() == 1 + assert '\t abc'.indent_width() == 2 + assert '\t\tabc'.indent_width() == 2 + assert '\t\t abc'.indent_width() == 3 +} diff --git a/vlib/builtin/string_trim_indent_test.v b/vlib/builtin/string_trim_indent_test.v new file mode 100644 index 0000000000..850734caef --- /dev/null +++ b/vlib/builtin/string_trim_indent_test.v @@ -0,0 +1,126 @@ +// Copyright (c) 2019-2022 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +fn test_empty_string() { + assert ''.trim_indent() == '' +} + +fn test_blank_string() { + assert ' \t'.trim_indent() == '' +} + +fn test_multiline_blank_string() { + assert ' + \t +'.trim_indent() == '' +} + +fn test_zero_indentation() { + assert 'abc +def'.trim_indent() == 'abc\ndef' +} + +fn test_zero_indentation_and_blank_first_and_last_lines() { + assert ' +abc +def +'.trim_indent() == 'abc\ndef' +} + +fn test_common_case_tabbed() { + assert ' + abc + def + '.trim_indent() == 'abc\ndef' +} + +fn test_common_case_spaced() { + assert ' + abc + def + '.trim_indent() == 'abc\ndef' +} + +fn test_common_case_tabbed_with_middle_blank_like() { + assert ' + abc + + def + '.trim_indent() == 'abc\n\ndef' +} + +fn test_common_case_tabbed_with_blank_first_line() { + assert ' \t + abc + def + '.trim_indent() == 'abc\ndef' +} + +fn test_common_case_tabbed_with_blank_first_and_last_line() { + assert ' \t + abc + def + \t '.trim_indent() == 'abc\ndef' +} + +fn test_html() { + assert ' + + +
+ + ++ Hello, World! +
+ + + '.trim_indent() == ' + + + + ++ Hello, World! +
+ +' +} + +fn test_broken_html() { + assert ' + + + + + ++ Hello, World! +
+ + + '.trim_indent() == ' + + + + ++ Hello, World! +
+ + ' +} + +fn test_doc_example() { + st := ' + Hello there, + this is a string, + all the leading indents are removed + and also the first and the last lines if they are blank +'.trim_indent() + assert st == 'Hello there, +this is a string, +all the leading indents are removed +and also the first and the last lines if they are blank' +}