2025-06-23 16:02:51 +02:00

445 lines
15 KiB
TypeScript

class ParseError extends Error {
line: number;
context: string;
constructor(message = '', line = -1, context = '') {
let combinedMessage = `Cannot parse line ${line + 1}`;
if (context !== undefined) {
combinedMessage += ` near \`${context}\``;
}
if (message) {
combinedMessage += `: ${message}`;
}
super(combinedMessage);
this.line = line;
this.context = context;
}
}
type FormattingNode = EmptyLineNode | CommentNode;
export class EmptyLineNode {
toString(): string {
return '\n';
}
}
export class CommentNode {
constructor(public comment: string, public whitespace: string = '') { }
toString(indent: number = 0) {
return `${' '.repeat(4 * indent)}${this.whitespace}#${this.comment}\n`;
}
}
export type Node = DictNode | ListNode | InlineNode | VerbatimStringNode | FoldedStringNode;
const initFormatting = (formatting: FormattingNode[], indent: number) => {
return `${formatting.map((node) => node.toString(indent)).join('')}${' '.repeat(4 * indent)}`;
};
export class DictNode {
constructor(public items: DictEntry[], public formatting: FormattingNode[]) { }
get separator() {
return '\n';
}
toString(indent: number = 0): string {
return this.formatting.map((node) => node.toString(indent)).join('') +
this.items.map((item) => item.toString(indent)).join('\n');
}
}
export class DictEntry {
constructor(
public key: string,
public value: Node,
public formatting: FormattingNode[],
public comment?: CommentNode,
) { }
toString(indent: number = 0): string {
return `${initFormatting(this.formatting, indent)
}${this.key}:${this.comment ? this.comment.toString() : this.value.separator
}${this.value.toString(indent + 1)}`;
}
}
export class ListNode {
constructor(public items: ListItem[], public formatting: FormattingNode[]) { }
get separator() {
return '\n';
}
toString(indent: number = 0): string {
return this.items.map((item) => item.toString(indent)).join('\n');
}
}
export class ListItem {
constructor(public value: Node, public formatting: FormattingNode[]) { }
toString(indent: number = 0): string {
return `${initFormatting(this.formatting, indent)}-${this.value.separator}${this.value.toString(indent + 1)}`;
}
}
type InlineNode = SingleLineScalarNode | InlineDictNode | InlineListNode;
export class SingleLineScalarNode {
constructor(
public value: null | boolean | number | string | Date,
public formatting: FormattingNode[],
public comment: string | undefined,
) { }
get separator() {
return ' ';
}
get representation(): string {
if (this.value === null) {
return '~';
}
if (typeof this.value === 'string') {
return `'${this.value.replaceAll('\'', '\'\'')}'`;
}
return this.value.toString();
}
toString(): string {
return `${this.representation}${this.comment ? ` # ${this.comment}` : ''}`;
}
}
export class InlineListNode {
constructor(public items: Node[]) { }
get separator() {
return ' ';
}
toString(): string {
return `[${this.items.map((item) => item.toString()).join(', ')}]`;
}
}
export class InlineDictNode {
constructor(public items: Map<string, Node>) { }
get separator() {
return ' ';
}
toString(): string {
if (this.items.size === 0) {
return '{}';
}
const formatEntry = ([key, value]: [string, Node]): string => `${key}: ${value.toString()}`;
return `{ ${[...this.items.entries()].map(formatEntry).join(', ')} }`;
}
}
export class VerbatimStringNode {
constructor(public lines: string[], public comment?: CommentNode) { }
get separator() {
return ' ';
}
toString(indent: number = 0): string {
const formatLine = (line: string) => {
if (!line) {
return '';
}
return `${' '.repeat(4 * indent)}${line}`;
};
return `|${this.comment ? this.comment.toString() : '\n'}${this.lines.map(formatLine).join('\n')}`;
}
}
export class FoldedStringNode {
constructor(public lines: string[], public comment?: CommentNode) { }
get separator() {
return ' ';
}
toString(indent: number = 0): string {
const formatLine = (line: string) => {
if (!line) {
return '';
}
return `${' '.repeat(4 * indent)}${line}`;
};
return `>${this.comment ? this.comment.toString() : '\n'}${this.lines.map(formatLine).join('\n')}`;
}
}
export const parse = (value: string): Node => {
value = value.replaceAll('\r\n', '\n').replaceAll('\r', '\n');
const parser = new Parser();
return parser.parseLines(value.split('\n'));
};
const regexLineComment = /^(?<whitespace> *)#(?<comment>.*?)$/;
const regexPartComment = /(?: *#(?<comment>.*?))?$/;
const regexPartDate = '(?:\\d\\d\\d\\d-\\d\\d-\\d\\d)';
const regexPartTime = '(?:\\d\\d:\\d\\d:\\d\\d(?:[+-]\\d\\d\\d\\d)?)';
const regexPartDictKey = '([^:#\' {}]+):';
const regexPartInlineElement = '((?:[^,\']+)|(?:\'[^\']*\'))\\s*,?';
const regexNull = new RegExp(`^~${regexPartComment.source}$`);
const regexIntDec = new RegExp(`^([+-]?[0-9]+)${regexPartComment.source}$`);
const regexIntBin = new RegExp(`^([+-])?0b([0-1]+)${regexPartComment.source}`);
const regexIntOct = new RegExp(`^([+-])?0o([0-7]+)${regexPartComment.source}`);
const regexIntHex = new RegExp(`^([+-])?0x([0-9A-Ea-e]+)${regexPartComment.source}`);
const regexInf = new RegExp(`^([+-])?inf${regexPartComment.source}`);
const regexFloat = new RegExp(`^([+-]?[0-9]*\\.[0-9]*([Ee][+-][0-9]+)?)${regexPartComment.source}`);
const regexBool = new RegExp(`^(true|false)?${regexPartComment.source}`);
const regexDatetime = new RegExp(
`^(${regexPartDate}|${regexPartTime}|${regexPartDate} ${regexPartTime}|(?:@\\d+))${regexPartComment.source}`,
);
const regexStringInline = new RegExp(`^'((?:[^']|'')*)'${regexPartComment.source}`);
const regexStringBlock = new RegExp(`^([>|])${regexPartComment.source}`);
const regexDict = new RegExp(`^${regexPartDictKey}( .*?|)$`);
const regexListInline = new RegExp(`^\\[(.*)\\]${regexPartComment.source}`);
const regexDictInline = new RegExp(`^\\{(.*)\\}${regexPartComment.source}`);
const regexList = new RegExp('^-( .*?|)$');
class Parser {
lines: string[] = [];
currentLineNumber: number = 0;
currentIndent: number = 0;
formatting: FormattingNode[] = [];
private get isCurrentLineValid(): boolean {
if (this.currentLineNumber >= this.lines.length) {
return false;
}
const currentLine = this.lines[this.currentLineNumber];
if (this.currentLineNumber === this.lines.length - 1) {
return currentLine.trim().length !== 0;
}
return currentLine.startsWith(' '.repeat(4 * this.currentIndent)) || currentLine.trim().length === 0;
}
private get currentLine() {
return this.lines[this.currentLineNumber].substring(4 * this.currentIndent);
}
private createError(message: string): ParseError {
return new ParseError(message, this.currentLineNumber, this.currentLine);
}
private getAndClearFormatting(): FormattingNode[] {
const formatting = this.formatting;
this.formatting = [];
return formatting;
}
parseLines(lines: string[]): Node {
this.lines = lines;
this.currentLineNumber = 0;
this.currentIndent = 0;
this.formatting = [];
return this.parse();
}
private parse(): Node {
let node: Node | undefined = undefined;
let match: RegExpMatchArray | null = null;
while (this.isCurrentLineValid) {
if (this.currentLine.trim().length === 0) {
this.formatting.push(new EmptyLineNode());
this.currentLineNumber++;
continue;
}
match = this.currentLine.match(regexLineComment);
if (match) {
this.formatting.push(new CommentNode(match.groups!.comment, match.groups!.whitespace ?? ''));
this.currentLineNumber++;
continue;
}
match = this.currentLine.match(regexDict);
if (match) {
if (node === undefined) {
node = new DictNode([], this.getAndClearFormatting());
}
if (!(node instanceof DictNode)) {
throw this.createError('Dict in wrong context');
}
const formatting = this.getAndClearFormatting();
const value = this.parseBlock(match[2].trim());
const comment = value instanceof DictNode ? this.extractComment(match[0]) : undefined;
node.items.push(new DictEntry(match[1].trim(), value, formatting, comment));
continue;
}
match = this.currentLine.match(regexList);
if (match) {
if (node === undefined) {
node = new ListNode([], this.getAndClearFormatting());
} else if (!(node instanceof ListNode)) {
throw this.createError('List in wrong context');
}
const formatting = this.getAndClearFormatting();
node.items.push(new ListItem(this.parseBlock(match[1].trim()), formatting));
continue;
}
match = this.currentLine.match(regexStringBlock);
if (match) {
return this.parseBlock(match[1]);
}
if (node !== undefined) {
throw this.createError('Scalar in wrong context');
}
node = this.parseInline(this.currentLine);
this.currentLineNumber++;
}
if (node === undefined) {
throw new Error('undefined state');
}
return node;
}
private parseInline(value: string): InlineNode {
const formatting = this.getAndClearFormatting();
let match;
match = value.match(regexNull);
if (match) {
return new SingleLineScalarNode(null, formatting, match.groups?.comment?.trim());
}
match = value.match(regexIntDec);
if (match) {
return new SingleLineScalarNode(parseInt(match[1]), formatting, match.groups?.comment?.trim());
}
match = value.match(regexIntBin);
if (match) {
throw new Error('unimplemented int bin');
}
match = value.match(regexIntOct);
if (match) {
throw new Error('unimplemented int oct');
}
match = value.match(regexIntHex);
if (match) {
throw new Error('unimplemented int hex');
}
match = value.match(regexInf);
if (match) {
throw new Error('unimplemented inf');
}
match = value.match(regexFloat);
if (match) {
throw new Error('unimplemented float');
}
match = value.match(regexBool);
if (match) {
return new SingleLineScalarNode(match[1] === 'true', formatting, match.groups?.comment?.trim());
}
match = value.match(regexDatetime);
if (match) {
throw new Error('unimplemented date');
}
match = value.match(regexStringInline);
if (match) {
return new SingleLineScalarNode(
match[1].replaceAll('\'\'', '\''),
formatting,
match.groups?.comment?.trim(),
);
}
match = value.match(regexListInline);
if (match) {
const result = [];
let str = match[1].trim();
let itemMatch;
while (str.length > 0) {
itemMatch = str.match(new RegExp(`^${regexPartInlineElement}`));
if (itemMatch) {
result.push(this.parseInline(itemMatch[1]));
str = str.substr(itemMatch[0].length).trim();
} else {
throw this.createError('');
}
}
return new InlineListNode(result);
}
match = value.match(regexDictInline);
if (match) {
const result = new Map();
let str = match[1].trim();
let itemMatch;
while (str.length > 0) {
itemMatch = str.match(new RegExp(`^${regexPartDictKey} *${regexPartInlineElement}`));
if (itemMatch) {
result.set(itemMatch[1], this.parseInline(itemMatch[2]));
str = str.substring(itemMatch[0].length).trim();
} else {
throw this.createError('');
}
}
return new InlineDictNode(result);
}
throw this.createError('');
}
private parseBlock(value: string): Node {
const blockCharacter = value.trim().substring(0, 1);
switch (blockCharacter) {
case '':
case '#': {
this.currentIndent++;
this.currentLineNumber++;
const block = this.parse();
for (const formattingNode of this.formatting) {
if (formattingNode instanceof CommentNode) {
formattingNode.whitespace += ' '.repeat(4);
}
}
this.currentIndent--;
return block;
}
case '|':
case '>': {
this.currentIndent++;
this.currentLineNumber++;
const lines: string[] = [];
while (this.isCurrentLineValid) {
lines.push(this.currentLine);
this.currentLineNumber++;
}
this.currentIndent--;
const comment = this.extractComment(value);
if (blockCharacter === '|') {
return new VerbatimStringNode(lines, comment);
} else {
return new FoldedStringNode(lines, comment);
}
}
default: {
const node = this.parseInline(value);
this.currentLineNumber++;
return node;
}
}
}
private extractComment(value: string): CommentNode | undefined {
const match = value.match(regexPartComment);
return match?.groups?.comment
? new CommentNode(match.groups.comment, ' ')
: undefined;
}
}