mirror of
https://gitlab.com/PronounsPage/PronounsPage.git
synced 2025-08-04 03:27:05 -04:00
445 lines
15 KiB
TypeScript
445 lines
15 KiB
TypeScript
class ParseError extends Error {
|
|
line: number;
|
|
context: string;
|
|
|
|
constructor(message = '', line = -1, context = '') {
|
|
let combinedMessage = `Cannot parse line ${line + 1}`;
|
|
if (context !== undefined) {
|
|
combinedMessage += ` near \`${context}\``;
|
|
}
|
|
if (message) {
|
|
combinedMessage += `: ${message}`;
|
|
}
|
|
super(combinedMessage);
|
|
this.line = line;
|
|
this.context = context;
|
|
}
|
|
}
|
|
|
|
type FormattingNode = EmptyLineNode | CommentNode;
|
|
|
|
export class EmptyLineNode {
|
|
toString(): string {
|
|
return '\n';
|
|
}
|
|
}
|
|
|
|
export class CommentNode {
|
|
constructor(public comment: string, public whitespace: string = '') { }
|
|
|
|
toString(indent: number = 0) {
|
|
return `${' '.repeat(4 * indent)}${this.whitespace}#${this.comment}\n`;
|
|
}
|
|
}
|
|
|
|
export type Node = DictNode | ListNode | InlineNode | VerbatimStringNode | FoldedStringNode;
|
|
|
|
const initFormatting = (formatting: FormattingNode[], indent: number) => {
|
|
return `${formatting.map((node) => node.toString(indent)).join('')}${' '.repeat(4 * indent)}`;
|
|
};
|
|
|
|
export class DictNode {
|
|
constructor(public items: DictEntry[], public formatting: FormattingNode[]) { }
|
|
|
|
get separator() {
|
|
return '\n';
|
|
}
|
|
|
|
toString(indent: number = 0): string {
|
|
return this.formatting.map((node) => node.toString(indent)).join('') +
|
|
this.items.map((item) => item.toString(indent)).join('\n');
|
|
}
|
|
}
|
|
|
|
export class DictEntry {
|
|
constructor(
|
|
public key: string,
|
|
public value: Node,
|
|
public formatting: FormattingNode[],
|
|
public comment?: CommentNode,
|
|
) { }
|
|
|
|
toString(indent: number = 0): string {
|
|
return `${initFormatting(this.formatting, indent)
|
|
}${this.key}:${this.comment ? this.comment.toString() : this.value.separator
|
|
}${this.value.toString(indent + 1)}`;
|
|
}
|
|
}
|
|
|
|
export class ListNode {
|
|
constructor(public items: ListItem[], public formatting: FormattingNode[]) { }
|
|
|
|
get separator() {
|
|
return '\n';
|
|
}
|
|
|
|
toString(indent: number = 0): string {
|
|
return this.items.map((item) => item.toString(indent)).join('\n');
|
|
}
|
|
}
|
|
|
|
export class ListItem {
|
|
constructor(public value: Node, public formatting: FormattingNode[]) { }
|
|
|
|
toString(indent: number = 0): string {
|
|
return `${initFormatting(this.formatting, indent)}-${this.value.separator}${this.value.toString(indent + 1)}`;
|
|
}
|
|
}
|
|
|
|
type InlineNode = SingleLineScalarNode | InlineDictNode | InlineListNode;
|
|
|
|
export class SingleLineScalarNode {
|
|
constructor(
|
|
public value: null | boolean | number | string | Date,
|
|
public formatting: FormattingNode[],
|
|
public comment: string | undefined,
|
|
) { }
|
|
|
|
get separator() {
|
|
return ' ';
|
|
}
|
|
|
|
get representation(): string {
|
|
if (this.value === null) {
|
|
return '~';
|
|
}
|
|
if (typeof this.value === 'string') {
|
|
return `'${this.value.replaceAll('\'', '\'\'')}'`;
|
|
}
|
|
return this.value.toString();
|
|
}
|
|
|
|
toString(): string {
|
|
return `${this.representation}${this.comment ? ` # ${this.comment}` : ''}`;
|
|
}
|
|
}
|
|
|
|
export class InlineListNode {
|
|
constructor(public items: Node[]) { }
|
|
|
|
get separator() {
|
|
return ' ';
|
|
}
|
|
|
|
toString(): string {
|
|
return `[${this.items.map((item) => item.toString()).join(', ')}]`;
|
|
}
|
|
}
|
|
|
|
export class InlineDictNode {
|
|
constructor(public items: Map<string, Node>) { }
|
|
|
|
get separator() {
|
|
return ' ';
|
|
}
|
|
|
|
toString(): string {
|
|
if (this.items.size === 0) {
|
|
return '{}';
|
|
}
|
|
const formatEntry = ([key, value]: [string, Node]): string => `${key}: ${value.toString()}`;
|
|
return `{ ${[...this.items.entries()].map(formatEntry).join(', ')} }`;
|
|
}
|
|
}
|
|
|
|
export class VerbatimStringNode {
|
|
constructor(public lines: string[], public comment?: CommentNode) { }
|
|
|
|
get separator() {
|
|
return ' ';
|
|
}
|
|
|
|
toString(indent: number = 0): string {
|
|
const formatLine = (line: string) => {
|
|
if (!line) {
|
|
return '';
|
|
}
|
|
return `${' '.repeat(4 * indent)}${line}`;
|
|
};
|
|
return `|${this.comment ? this.comment.toString() : '\n'}${this.lines.map(formatLine).join('\n')}`;
|
|
}
|
|
}
|
|
|
|
export class FoldedStringNode {
|
|
constructor(public lines: string[], public comment?: CommentNode) { }
|
|
|
|
get separator() {
|
|
return ' ';
|
|
}
|
|
|
|
toString(indent: number = 0): string {
|
|
const formatLine = (line: string) => {
|
|
if (!line) {
|
|
return '';
|
|
}
|
|
return `${' '.repeat(4 * indent)}${line}`;
|
|
};
|
|
return `>${this.comment ? this.comment.toString() : '\n'}${this.lines.map(formatLine).join('\n')}`;
|
|
}
|
|
}
|
|
|
|
export const parse = (value: string): Node => {
|
|
value = value.replaceAll('\r\n', '\n').replaceAll('\r', '\n');
|
|
const parser = new Parser();
|
|
return parser.parseLines(value.split('\n'));
|
|
};
|
|
|
|
const regexLineComment = /^(?<whitespace> *)#(?<comment>.*?)$/;
|
|
const regexPartComment = /(?: *#(?<comment>.*?))?$/;
|
|
const regexPartDate = '(?:\\d\\d\\d\\d-\\d\\d-\\d\\d)';
|
|
const regexPartTime = '(?:\\d\\d:\\d\\d:\\d\\d(?:[+-]\\d\\d\\d\\d)?)';
|
|
const regexPartDictKey = '([^:#\' {}]+):';
|
|
const regexPartInlineElement = '((?:[^,\']+)|(?:\'[^\']*\'))\\s*,?';
|
|
|
|
const regexNull = new RegExp(`^~${regexPartComment.source}$`);
|
|
const regexIntDec = new RegExp(`^([+-]?[0-9]+)${regexPartComment.source}$`);
|
|
const regexIntBin = new RegExp(`^([+-])?0b([0-1]+)${regexPartComment.source}`);
|
|
const regexIntOct = new RegExp(`^([+-])?0o([0-7]+)${regexPartComment.source}`);
|
|
const regexIntHex = new RegExp(`^([+-])?0x([0-9A-Ea-e]+)${regexPartComment.source}`);
|
|
const regexInf = new RegExp(`^([+-])?inf${regexPartComment.source}`);
|
|
const regexFloat = new RegExp(`^([+-]?[0-9]*\\.[0-9]*([Ee][+-][0-9]+)?)${regexPartComment.source}`);
|
|
const regexBool = new RegExp(`^(true|false)?${regexPartComment.source}`);
|
|
const regexDatetime = new RegExp(
|
|
`^(${regexPartDate}|${regexPartTime}|${regexPartDate} ${regexPartTime}|(?:@\\d+))${regexPartComment.source}`,
|
|
);
|
|
const regexStringInline = new RegExp(`^'((?:[^']|'')*)'${regexPartComment.source}`);
|
|
const regexStringBlock = new RegExp(`^([>|])${regexPartComment.source}`);
|
|
const regexDict = new RegExp(`^${regexPartDictKey}( .*?|)$`);
|
|
const regexListInline = new RegExp(`^\\[(.*)\\]${regexPartComment.source}`);
|
|
const regexDictInline = new RegExp(`^\\{(.*)\\}${regexPartComment.source}`);
|
|
const regexList = new RegExp('^-( .*?|)$');
|
|
|
|
class Parser {
|
|
lines: string[] = [];
|
|
currentLineNumber: number = 0;
|
|
currentIndent: number = 0;
|
|
formatting: FormattingNode[] = [];
|
|
|
|
private get isCurrentLineValid(): boolean {
|
|
if (this.currentLineNumber >= this.lines.length) {
|
|
return false;
|
|
}
|
|
const currentLine = this.lines[this.currentLineNumber];
|
|
if (this.currentLineNumber === this.lines.length - 1) {
|
|
return currentLine.trim().length !== 0;
|
|
}
|
|
return currentLine.startsWith(' '.repeat(4 * this.currentIndent)) || currentLine.trim().length === 0;
|
|
}
|
|
|
|
private get currentLine() {
|
|
return this.lines[this.currentLineNumber].substring(4 * this.currentIndent);
|
|
}
|
|
|
|
private createError(message: string): ParseError {
|
|
return new ParseError(message, this.currentLineNumber, this.currentLine);
|
|
}
|
|
|
|
private getAndClearFormatting(): FormattingNode[] {
|
|
const formatting = this.formatting;
|
|
this.formatting = [];
|
|
return formatting;
|
|
}
|
|
|
|
parseLines(lines: string[]): Node {
|
|
this.lines = lines;
|
|
this.currentLineNumber = 0;
|
|
this.currentIndent = 0;
|
|
this.formatting = [];
|
|
return this.parse();
|
|
}
|
|
|
|
private parse(): Node {
|
|
let node: Node | undefined = undefined;
|
|
let match: RegExpMatchArray | null = null;
|
|
|
|
while (this.isCurrentLineValid) {
|
|
if (this.currentLine.trim().length === 0) {
|
|
this.formatting.push(new EmptyLineNode());
|
|
this.currentLineNumber++;
|
|
continue;
|
|
}
|
|
match = this.currentLine.match(regexLineComment);
|
|
if (match) {
|
|
this.formatting.push(new CommentNode(match.groups!.comment, match.groups!.whitespace ?? ''));
|
|
this.currentLineNumber++;
|
|
continue;
|
|
}
|
|
|
|
match = this.currentLine.match(regexDict);
|
|
if (match) {
|
|
if (node === undefined) {
|
|
node = new DictNode([], this.getAndClearFormatting());
|
|
}
|
|
if (!(node instanceof DictNode)) {
|
|
throw this.createError('Dict in wrong context');
|
|
}
|
|
const formatting = this.getAndClearFormatting();
|
|
const value = this.parseBlock(match[2].trim());
|
|
const comment = value instanceof DictNode ? this.extractComment(match[0]) : undefined;
|
|
node.items.push(new DictEntry(match[1].trim(), value, formatting, comment));
|
|
continue;
|
|
}
|
|
|
|
match = this.currentLine.match(regexList);
|
|
if (match) {
|
|
if (node === undefined) {
|
|
node = new ListNode([], this.getAndClearFormatting());
|
|
} else if (!(node instanceof ListNode)) {
|
|
throw this.createError('List in wrong context');
|
|
}
|
|
const formatting = this.getAndClearFormatting();
|
|
node.items.push(new ListItem(this.parseBlock(match[1].trim()), formatting));
|
|
continue;
|
|
}
|
|
|
|
match = this.currentLine.match(regexStringBlock);
|
|
if (match) {
|
|
return this.parseBlock(match[1]);
|
|
}
|
|
|
|
if (node !== undefined) {
|
|
throw this.createError('Scalar in wrong context');
|
|
}
|
|
|
|
node = this.parseInline(this.currentLine);
|
|
this.currentLineNumber++;
|
|
}
|
|
if (node === undefined) {
|
|
throw new Error('undefined state');
|
|
}
|
|
return node;
|
|
}
|
|
|
|
private parseInline(value: string): InlineNode {
|
|
const formatting = this.getAndClearFormatting();
|
|
|
|
let match;
|
|
match = value.match(regexNull);
|
|
if (match) {
|
|
return new SingleLineScalarNode(null, formatting, match.groups?.comment?.trim());
|
|
}
|
|
match = value.match(regexIntDec);
|
|
if (match) {
|
|
return new SingleLineScalarNode(parseInt(match[1]), formatting, match.groups?.comment?.trim());
|
|
}
|
|
match = value.match(regexIntBin);
|
|
if (match) {
|
|
throw new Error('unimplemented int bin');
|
|
}
|
|
match = value.match(regexIntOct);
|
|
if (match) {
|
|
throw new Error('unimplemented int oct');
|
|
}
|
|
match = value.match(regexIntHex);
|
|
if (match) {
|
|
throw new Error('unimplemented int hex');
|
|
}
|
|
match = value.match(regexInf);
|
|
if (match) {
|
|
throw new Error('unimplemented inf');
|
|
}
|
|
match = value.match(regexFloat);
|
|
if (match) {
|
|
throw new Error('unimplemented float');
|
|
}
|
|
match = value.match(regexBool);
|
|
if (match) {
|
|
return new SingleLineScalarNode(match[1] === 'true', formatting, match.groups?.comment?.trim());
|
|
}
|
|
match = value.match(regexDatetime);
|
|
if (match) {
|
|
throw new Error('unimplemented date');
|
|
}
|
|
match = value.match(regexStringInline);
|
|
if (match) {
|
|
return new SingleLineScalarNode(
|
|
match[1].replaceAll('\'\'', '\''),
|
|
formatting,
|
|
match.groups?.comment?.trim(),
|
|
);
|
|
}
|
|
match = value.match(regexListInline);
|
|
if (match) {
|
|
const result = [];
|
|
let str = match[1].trim();
|
|
let itemMatch;
|
|
while (str.length > 0) {
|
|
itemMatch = str.match(new RegExp(`^${regexPartInlineElement}`));
|
|
if (itemMatch) {
|
|
result.push(this.parseInline(itemMatch[1]));
|
|
str = str.substr(itemMatch[0].length).trim();
|
|
} else {
|
|
throw this.createError('');
|
|
}
|
|
}
|
|
return new InlineListNode(result);
|
|
}
|
|
match = value.match(regexDictInline);
|
|
if (match) {
|
|
const result = new Map();
|
|
let str = match[1].trim();
|
|
let itemMatch;
|
|
while (str.length > 0) {
|
|
itemMatch = str.match(new RegExp(`^${regexPartDictKey} *${regexPartInlineElement}`));
|
|
if (itemMatch) {
|
|
result.set(itemMatch[1], this.parseInline(itemMatch[2]));
|
|
str = str.substring(itemMatch[0].length).trim();
|
|
} else {
|
|
throw this.createError('');
|
|
}
|
|
}
|
|
return new InlineDictNode(result);
|
|
}
|
|
|
|
throw this.createError('');
|
|
}
|
|
|
|
private parseBlock(value: string): Node {
|
|
const blockCharacter = value.trim().substring(0, 1);
|
|
switch (blockCharacter) {
|
|
case '':
|
|
case '#': {
|
|
this.currentIndent++;
|
|
this.currentLineNumber++;
|
|
const block = this.parse();
|
|
for (const formattingNode of this.formatting) {
|
|
if (formattingNode instanceof CommentNode) {
|
|
formattingNode.whitespace += ' '.repeat(4);
|
|
}
|
|
}
|
|
this.currentIndent--;
|
|
return block;
|
|
}
|
|
case '|':
|
|
case '>': {
|
|
this.currentIndent++;
|
|
this.currentLineNumber++;
|
|
const lines: string[] = [];
|
|
while (this.isCurrentLineValid) {
|
|
lines.push(this.currentLine);
|
|
this.currentLineNumber++;
|
|
}
|
|
this.currentIndent--;
|
|
const comment = this.extractComment(value);
|
|
if (blockCharacter === '|') {
|
|
return new VerbatimStringNode(lines, comment);
|
|
} else {
|
|
return new FoldedStringNode(lines, comment);
|
|
}
|
|
}
|
|
default: {
|
|
const node = this.parseInline(value);
|
|
this.currentLineNumber++;
|
|
return node;
|
|
}
|
|
}
|
|
}
|
|
|
|
private extractComment(value: string): CommentNode | undefined {
|
|
const match = value.match(regexPartComment);
|
|
return match?.groups?.comment
|
|
? new CommentNode(match.groups.comment, ' ')
|
|
: undefined;
|
|
}
|
|
}
|