import re import os import argparse import sys import pickle import hashlib import difflib # fasm keywords keywords = [ "align", "equ", "org", "while", "load", "store", "times", "repeat", "display", "err", "assert", "if", "aaa", "aad", "aam", "aas", "adc", "add", "addpd", "addps", "addsd", "addss", "addsubpd", "addsubps", "adox", "aesdeclast", "aesenc", "aesenclast", "aesimc", "aeskeygenassist", "and", "andnpd", "andnps", "andpd", "andps", "arpl", "bextr", "blendpd", "blendvpd", "blendvps", "blsi", "blsmsk", "blsr", "bndcl", "bndcn", "bndldx", "bndmk", "bndmov", "bndstx", "bound", "bsf", "bsr", "bswap", "btc", "btr", "bts", "bzhi", "call", "cbw", "cdq", "cdqe", "clac", "clc", "cldemote", "clflush", "clflushopt", "cli", "clts", "clwb", "cmc", "cmova", "cmovb", "cmovbe", "cmovc", "cmove", "cmovg", "cmovge", "cmovl", "cmovle", "cmovnae", "cmovnb", "cmovnbe", "cmovnc", "cmovne", "cmovng", "cmovnge", "cmovnle", "cmovno", "cmovnp", "cmovns", "cmovnz", "cmovo", "cmovp", "cmovpo", "cmovs", "cmovz", "cmp", "cmppd", "cmpps", "cmps", "cmpsb", "cmpsd", "cmpsq", "cmpss", "cmpsw", "cmpxchg", "cmpxchg16b", "cmpxchg8b", "comiss", "cpuid", "cqo", "crc32", "cvtdq2pd", "cvtdq2ps", "cvtpd2dq", "cvtpd2ps", "cvtpi2pd", "cvtpi2ps", "cvtps2dq", "cvtps2pd", "cvtps2pi", "cvtsd2ss", "cvtsi2sd", "cvtsi2ss", "cvtss2sd", "cvtss2si", "cvttpd2dq", "cvttps2dq", "cvttps2pi", "cvttsd2si", "cvttss2si", "cwd", "cwde", "daa", "dec", "div", "divpd", "divps", "divsd", "divss", "dppd", "dpps", "emms", "extractps", "f2xm1", "fabs", "fadd", "faddp", "fbld", "fbstp", "fchs", "fcmova", "fcmovae", "fcmovb", "fcmovbe", "fcmovc", "fcmove", "fcmovg", "fcmovl", "fcmovle", "fcmovna", "fcmovnae", "fcmovnb", "fcmovnbe", "fcmovne", "fcmovng", "fcmovnge", "fcmovnl", "fcmovnle", "fcmovno", "fcmovns", "fcmovnz", "fcmovo", "fcmovp", "fcmovpe", "fcmovpo", "fcmovs", "fcom", "fcomi", "fcomip", "fcomp", "fcompp", "fcos", "fdecstp", "fdiv", "fdivr", "fdivrp", "ffree", "fiadd", "ficom", "ficomp", "fidiv", "fidivr", "fimul", "fincstp", "finit", "fist", "fistp", "fisttp", "fisub", "fisubr", "fld1", "fldcw", "fldenv", "fldl2e", "fldl2t", "fldlg2", "fldln2", "fldpi", "fmul", "fmulp", "fnclex", "fninit", "fnop", "fnsave", "fnstcw", "fnstenv", "fpatan", "fprem", "fprem1", "fptan", "frndint", "frstor", "fsave", "fsin", "fsincos", "fsqrt", "fst", "fstcw", "fstenv", "fstp", "fstsw", "fsubp", "fsubr", "fsubrp", "ftst", "fucom", "fucomi", "fucomip", "fucomp", "fwait", "fxam", "fxch", "fxrstor", "fxsave", "fxtract", "fyl2x", "gf2p8affineinvqb", "gf2p8affineqb", "gf2p8mulb", "haddpd", "haddps", "hsubpd", "hsubps", "idiv", "imul", "in", "inc", "ins", "insb", "insd", "insw", "int", "int1", "int3", "into", "invd", "invlpg", "invpcid", "iret", "jmp", "ja", "jae", "jb", "jbe", "jc", "jcxz", "jecxz", "je", "jg", "jge", "jle", "jna", "jnae", "jnb", "jnbe", "jnc", "jne", "jng", "jnge", "jnl", "jno", "jnp", "jns", "jnz", "jo", "jp", "jpe", "jpo", "js", "jz", "kaddb", "kaddq", "kaddw", "kandb", "kandd", "kandnb", "kandnd", "kandnq", "kandnw", "kandw", "kmovb", "kmovd", "kmovq", "kmovw", "knotb", "knotd", "knotq", "korb", "kord", "korq", "kortestb", "kortestd", "kortestq", "kortestw", "kshiftlb", "kshiftld", "kshiftlq", "kshiftlw", "kshiftrb", "kshiftrd", "kshiftrw", "ktestb", "ktestd", "ktestq", "ktestw", "kunpckbw", "kunpckdq", "kxnorb", "kxnord", "kxnorq", "kxnorw", "kxorb", "kxord", "kxorq", "kxorw", "lar", "lddqu", "ldmxcsr", "lds", "lea", "leave", "les", "lfence", "lfs", "lgs", "lidt", "lldt", "lmsw", "lock", "lods", "lodsb", "lodsd", "lodsq", "loop", "loopa", "loopae", "loopb", "loopbe", "loopc", "loope", "loopg", "loopl", "loople", "loopna", "loopnae", "loopnb", "loopnbe", "loopnc", "loopng", "loopnge", "loopnl", "loopnle", "loopno", "loopnp", "loopns", "loopo", "loopp", "looppe", "looppo", "loops", "loopz", "lsl", "lss", "lzcnt", "maskmovdqu", "maskmovq", "maxpd", "maxps", "maxsd", "maxss", "minpd", "minps", "minsd", "minss", "monitor", "mov", "movapd", "movaps", "movd", "movddup", "movdir64b", "movdiri", "movdq2q", "movdqa", "movdqu", "movhpd", "movhps", "movlhps", "movlpd", "movlps", "movmskpd", "movmskps", "movntdqa", "movnti", "movntpd", "movntps", "movntq", "movq", "movq", "movs", "movsb", "movsd", "movsd", "movshdup", "movsldup", "movsq", "movsw", "movsx", "movsxd", "movupd", "movups", "movzx", "mpsadbw", "mul", "mulps", "mulsd", "mulss", "mulx", "mwait", "neg", "nop", "not", "or", "orps", "out", "outs", "outsb", "outsd", "outsw", "pabsb", "pabsd", "pabsw", "packssdw", "packsswb", "packusdw", "packuswb", "paddb", "paddd", "paddsb", "paddsw", "paddusb", "paddusw", "paddw", "palignr", "pand", "pause", "pavgb", "pavgw", "pblendvb", "pblendw", "pclmulqdq", "pcmpeqb", "pcmpeqq", "pcmpeqw", "pcmpestri", "pcmpestrm", "pcmpgtb", "pcmpgtd", "pcmpgtw", "pcmpistri", "pcmpistrm", "pdep", "pext", "pextrb", "pextrd", "pextrw", "phaddd", "phaddsw", "phaddw", "phminposuw", "phsubd", "phsubsw", "pinsrb", "pinsrd", "pinsrq", "pinsrw", "pmaddubsw", "pmaddwd", "pmaxsb", "pmaxsq", "pmaxsw", "pmaxub", "pmaxud", "pmaxuq", "pmaxuw", "pminsb", "pminsq", "pminsw", "pminub", "pminud", "pminuq", "pminuw", "pmovmskb", "pmovzx", "pmuldq", "pmulhrsw", "pmulhuw", "pmulhw", "pmulld", "pmullq", "pmuludq", "pop", "popa", "popad", "popcnt", "popf", "popfd", "popfq", "prefetchw", "prefetchh", "psadbw", "pshufb", "pshufd", "pshufhw", "pshufw", "psignb", "psignd", "psignw", "pslld", "pslldq", "psllq", "psrad", "psraq", "psraw", "psrld", "psrldq", "psrlq", "psrlw", "psubb", "psubq", "psubsb", "psubsw", "psubusb", "psubusw", "psubw", "ptest", "punpckhbw", "punpckhdq", "punpckhqdq", "punpckhwd", "punpcklbw", "punpcklqdq", "punpcklwd", "push", "pushw", "pushd", "pusha", "pushad", "pushfd", "pushfq", "pxor", "rcl", "rcpps", "rcpss", "rcr", "rdfsbase", "rdmsr", "rdpid", "rdpkru", "rdpmc", "rdrand", "rdseed", "rdtsc", "rdtscp", "repe", "repne", "repnz", "repz", "ret", "rol", "ror", "rorx", "roundpd", "roundsd", "roundss", "rsm", "rsqrtps", "rsqrtss", "sahf", "sal", "sar", "sbb", "scas", "scasb", "scasd", "scasw", "seta", "setae", "setb", "setbe", "sete", "setg", "setge", "setl", "setle", "setna", "setnae", "setnb", "setnc", "setne", "setng", "setnge", "setnl", "setnle", "setno", "setnp", "setnz", "seto", "setp", "setpe", "setpo", "sets", "setz", "sfence", "sha1msg1", "sha1msg2", "sha1nexte", "sha1rnds4", "sha256msg1", "sha256rnds2", "shl", "shld", "shlx", "shr", "shrd", "shrx", "shufpd", "sidt", "sldt", "smsw", "sqrtpd", "sqrtps", "sqrtsd", "sqrtss", "stac", "std", "sti", "stmxcsr", "stos", "stosb", "stosd", "stosq", "stosw", "str", "subpd", "subps", "subsd", "subss", "swapgs", "syscall", "sysenter", "sysret", "test", "tpause", "tzcnt", "ucomisd", "ucomiss", "ud", "umwait", "unpckhpd", "unpckhps", "unpcklpd", "unpcklps", "valignd", "vblendmpd", "vblendmps", "vbroadcast", "vcompresspd", "vcompressps", "vcvtpd2udq", "vcvtpd2uqq", "vcvtph2ps", "vcvtps2ph", "vcvtps2qq", "vcvtps2uqq", "vcvtqq2pd", "vcvtqq2ps", "vcvtsd2usi", "vcvtss2usi", "vcvttpd2udq", "vcvttpd2uqq", "vcvttps2qq", "vcvttps2udq", "vcvttps2uqq", "vcvttss2usi", "vcvtudq2pd", "vcvtudq2ps", "vcvtuqq2pd", "vcvtuqq2ps", "vcvtusi2ss", "vdbpsadbw", "verr", "verw", "vexpandpd", "vexpandps", "vextractf32x4", "vextractf32x8", "vextractf64x2", "vextractf64x4", "vextracti32x4", "vextracti32x8", "vextracti64x2", "vextracti64x4", "vfixupimmps", "vfixupimmsd", "vfixupimmss", "vfmadd132pd", "vfmadd132ps", "vfmadd132ss", "vfmadd213pd", "vfmadd213ps", "vfmadd213sd", "vfmadd213ss", "vfmadd231ps", "vfmadd231sd", "vfmadd231ss", "vfmaddsub132pd", "vfmaddsub213pd", "vfmaddsub213ps", "vfmaddsub231pd", "vfmaddsub231ps", "vfmsub132ps", "vfmsub132sd", "vfmsub132ss", "vfmsub213pd", "vfmsub213ps", "vfmsub213ss", "vfmsub231pd", "vfmsub231ps", "vfmsub231sd", "vfmsub231ss", "vfmsubadd132ps", "vfmsubadd213pd", "vfmsubadd213ps", "vfmsubadd231pd", "vfnmadd132pd", "vfnmadd132ps", "vfnmadd132sd", "vfnmadd132ss", "vfnmadd213ps", "vfnmadd213sd", "vfnmadd213ss", "vfnmadd231pd", "vfnmadd231sd", "vfnmadd231ss", "vfnmsub132pd", "vfnmsub132ps", "vfnmsub132ss", "vfnmsub213pd", "vfnmsub213ps", "vfnmsub213sd", "vfnmsub231pd", "vfnmsub231ps", "vfnmsub231sd", "vfnmsub231ss", "vfpclassps", "vfpclasssd", "vfpclassss", "vgatherdpd", "vgatherdpd", "vgatherdps", "vgatherqpd", "vgatherqpd", "vgatherqps", "vgatherqps", "vgetexpps", "vgetexpsd", "vgetexpss", "vgetmantpd", "vgetmantps", "vgetmantss", "vinsertf128", "vinsertf32x4", "vinsertf32x8", "vinsertf64x4", "vinserti128", "vinserti32x4", "vinserti32x8", "vinserti64x4", "vmaskmov", "vmovdqa32", "vmovdqa64", "vmovdqu16", "vmovdqu64", "vmovdqu8", "vpblendd", "vpblendmb", "vpblendmd", "vpblendmq", "vpbroadcast", "vpbroadcastb", "vpbroadcastd", "vpbroadcastm", "vpbroadcastw", "vpcmpb", "vpcmpd", "vpcmpq", "vpcmpub", "vpcmpud", "vpcmpuw", "vpcmpw", "vpcompressd", "vpcompressq", "vpconflictd", "vperm2f128", "vperm2i128", "vpermb", "vpermd", "vpermi2b", "vpermi2d", "vpermi2ps", "vpermi2q", "vpermi2w", "vpermilpd", "vpermilps", "vpermpd", "vpermq", "vpermt2b", "vpermt2d", "vpermt2pd", "vpermt2ps", "vpermt2q", "vpermw", "vpexpandd", "vpexpandq", "vpgatherdd", "vpgatherdd", "vpgatherdq", "vpgatherqd", "vpgatherqd", "vpgatherqq", "vpgatherqq", "vplzcntq", "vpmadd52huq", "vpmadd52luq", "vpmaskmov", "vpmovb2m", "vpmovdb", "vpmovdw", "vpmovm2b", "vpmovm2d", "vpmovm2q", "vpmovm2w", "vpmovqb", "vpmovqd", "vpmovqw", "vpmovsdb", "vpmovsdw", "vpmovsqb", "vpmovsqw", "vpmovswb", "vpmovusdb", "vpmovusdw", "vpmovusqb", "vpmovusqd", "vpmovuswb", "vpmovw2m", "vpmovwb", "vpmultishiftqb", "vprold", "vprolq", "vprolvq", "vprord", "vprorq", "vprorvd", "vprorvq", "vpscatterdd", "vpscatterqd", "vpscatterqq", "vpsllvd", "vpsllvq", "vpsllvw", "vpsravd", "vpsravw", "vpsrlvd", "vpsrlvq", "vpsrlvw", "vpternlogd", "vpternlogq", "vptestmd", "vptestmq", "vptestmw", "vptestnmb", "vptestnmd", "vptestnmq", "vrangepd", "vrangeps", "vrangesd", "vrangess", "vrcp14pd", "vrcp14ps", "vrcp14ss", "vreducepd", "vreduceps", "vreducesd", "vreducess", "vrndscaleps", "vrndscalesd", "vrndscaless", "vrsqrt14pd", "vrsqrt14ps", "vrsqrt14ss", "vscalefpd", "vscalefps", "vscalefsd", "vscalefss", "vscatterdps", "vscatterqpd", "vscatterqps", "vshuff32x4", "vshuff64x2", "vshufi64x2", "vtestpd", "vtestps", "vzeroall", "vzeroupper", "wait", "wrfsbase", "wrgsbase", "wrmsr", "wrpkru", "xabort", "xacquire", "xadd", "xchg", "xend", "xgetbv", "xlat", "xlatb", "xor", "xorpd", "xorps", "xrstor", "xrstors", "xsave", "xsavec", "xsaveopt", "xsaves", "xsetbv", ] fasm_types = [ "db", "rb", "dw", "rw", "dd", "rd", "dp", "rp", "df", "rf", "dq", "rq", "dt", "rt", "du", ] # Add kind flag to identifier in id2kind def id_add_kind(identifier, kind): if identifier not in id2kind: id2kind[identifier] = '' id2kind[identifier] += kind # Remove kind flag of identifier in id2kind def id_remove_kind(identifier, kind): if identifier in id2kind: if kind in id2kind[identifier]: id2kind[identifier] = id2kind[identifier].replace(kind, '') # Get kind of an identifier def id_get_kind(identifier): if identifier in id2kind: return id2kind[identifier] else: return '' class LegacyAsmReader: def __init__(self, file): self.file = file self.lines = open(file, "r", encoding="utf-8").readlines() self.line_idx = 0 self.i = 0 def currline(self): return self.lines[self.line_idx] def curr(self): try: return self.lines[self.line_idx][self.i] except: return '' def step(self): c = self.curr() self.i += 1 # Wrap the line if '\\' followed by whitespaces and/or comment while self.curr() == '\\': i_of_backslash = self.i self.i += 1 while self.curr().isspace(): self.i += 1 if self.curr() == ';' or self.curr() == '': self.line_idx += 1 self.i = 0 else: # There's something other than a comment after the backslash # So don't interpret the backslash as a line wrap self.i = i_of_backslash break return c def nextline(self): c = self.curr() while c != '': c = self.step() self.line_idx += 1 self.i = 0 def no_lines(self): if self.line_idx >= len(self.lines): return True return False def location(self): return f"{self.file}:{self.line_idx + 1}" def skip_spaces(self): while self.curr().isspace(): self.step() class AsmReaderRecognizingStrings(LegacyAsmReader): def __init__(self, file): super().__init__(file) self.in_string = None self.should_recognize_strings = True def step(self): c = super().step() if self.should_recognize_strings and (c == '"' or c == "'"): # If just now we was at the double or single quotation mark # and we aren't in a string yet then say # "we are in a string openned with this quotation mark now" if self.in_string is None: self.in_string = c # If just now we was at the double or single quotation mark # and we are in the string entered with the same quotation mark # then say "we aren't in a string anymore" elif self.in_string == c: self.in_string = None return c class AsmReaderReadingComments(AsmReaderRecognizingStrings): def __init__(self, file): super().__init__(file) self.status = dict() self.status_reset() self.comment = '' def status_reset(self): # If the line has non-comment code self.status_has_code = False # If the line has a comment at the end self.status_has_comment = False # Let it recognize strings further, we are definitely out of a comment self.should_recognize_strings = True def status_set_has_comment(self): self.status_has_comment = True # Don't let it recognize strings cause we are in a comment now self.should_recognize_strings = False def status_set_has_code(self): self.status_has_code = True def update_status(self): # If we aren't in a comment and we aren't in a string - # say we are now in a comment if ';' met if (not self.status_has_comment and not self.in_string and self.curr() == ';'): self.status_set_has_comment() # Else if we are in a comment - collect the comment elif self.status_has_comment: self.comment += self.curr() # Else if there's some non-whitespace character out of a comment # then the line has code elif not self.status_has_comment and not self.curr().isspace(): self.status_set_has_code() def step(self): # Get to the next character c = super().step() # Update status of the line according to the next character self.update_status() return c def nextline(self): prev_line = self.currline() super().nextline() # If the line we leave was not a comment-only line # then forget the collected comment # Otherwise the collected comment should be complemented by # comment from next line in step() if self.status_has_code: # But we should preserve comment for the next line # If previous line set align (cause many functions re documented # right before align set, not before their labels) if not prev_line.startswith("align "): self.comment = '' # Reset the line status (now it's the status of the new line) self.status_reset() # Set new status for this line according to the # first character in the line self.update_status() class AsmReaderFetchingIdentifiers(AsmReaderReadingComments): def __init__(self, file): super().__init__(file) def fetch_identifier(self): self.skip_spaces() result = '' while is_id(self.curr()): result += self.step() return result class AsmReader(AsmReaderFetchingIdentifiers): def __init__(self, file): super().__init__(file) def append_file(full_path, contents): if debug_mode: if full_path not in output_files: output_files[full_path] = "" output_files[full_path] += contents else: f = open(full_path, "a") f.write(contents) f.close() class AsmElement: def __init__(self, location, name, comment): global warnings # If the element was constructed during this execution then # the element is new self.new = True self.location = location self.file = self.location.split(':')[0].replace('\\', '/') self.line = self.location.split(':')[1] self.name = name self.comment = comment if self.comment == '': warnings += f'{self.location}: Undocumented element\n' def dump(self): print(f"\n{self.location}: {self.name}") print(f"{self.comment}") def emit(self, dest, doxycomment='', declaration=''): # Do not emit anything if the symbol is marked as hidden in its comment if '@dont_give_a_doxygen' in self.comment: return global warnings # Redefine default declaration if declaration == '': declaration = f'#define {self.name}' # Check doxycomment if not doxycomment.endswith('\n'): doxycomment += '\n' if doxycomment.split('@brief ')[1][0].islower(): warnings += (f"{self.location}: Brief comment starting from " + "lowercase\n") # Build contents to emit contents = '' contents += '/**\n' contents += doxycomment contents += (f"@par Source\n" + f"{self.file}:{self.line}\n") contents += '*/\n' contents += declaration contents += '\n\n' # Get path to file to emit this full_path = dest + '/' + self.file # Remove the file on first access if it was # created by previous generation if full_path not in created_files: if os.path.isfile(full_path): os.remove(full_path) created_files.append(full_path) # Create directories need for the file os.makedirs(os.path.dirname(full_path), exist_ok=True) contents = ''.join([i if ord(i) < 128 else '?' for i in contents]) append_file(full_path, contents) class AsmVariable(AsmElement): def __init__(self, location, name, comment, type, init): super().__init__(location, name, comment) self.type = type self.init = init def dump(self): super().dump() print(f"(Variable)\n---") def emit(self, dest): # Build doxycomment specific for the variable doxycomment = '' doxycomment += self.comment if '@brief' not in doxycomment: doxycomment = '@brief ' + doxycomment doxycomment += (f"@par Initial value\n" + f"{self.init}\n") # Build the declaration name = self.name.replace(".", "_") var_type = self.type.replace(".", "_") declaration = f"{var_type} {name};" # Emit this super().emit(dest, doxycomment, declaration) class AsmFunction(AsmElement): def __init__(self, location, name, comment, calling_convention, args, used_regs): super().__init__(location, name, comment) self.calling_convention = calling_convention self.args = args self.used_regs = used_regs def dump(self): super().dump() print(f"(Function)\n---") def emit(self, dest): # Build doxycomment specific for the variable doxycomment = '' doxycomment += self.comment if '@brief' not in doxycomment: doxycomment = '@brief ' + doxycomment # If there was no arguments, maybe that's just a label # then parse parameters from its comment if len(self.args) == 0 and '@param' in self.comment: i = 0 while '@param' in self.comment[i:]: i = self.comment.index('@param', i) # Skip '@param' i += len('@param') # Skip spaces after '@param' while self.comment[i].isspace(): i += 1 # Get the parameter name name = '' while is_id(self.comment[i]): name += self.comment[i] i += 1 # Save the parameter self.args.append((name, 'arg_t')) # Build the arg list for declaration arg_list = '(' if len(self.args) > 0: argc = 0 for arg in self.args: if argc != 0: arg_list += ", " arg_list += f"{arg[1]} {arg[0]}" argc += 1 arg_list += ')' # Build the declaration name = self.name.replace(".", "_") declaration = f"void {name}{arg_list};" # Emit this super().emit(dest, doxycomment, declaration) class AsmLabel(AsmElement): def __init__(self, location, name, comment): super().__init__(location, name, comment) def dump(self): super().dump() print(f"(Label)\n---") def emit(self, dest): # Build doxycomment specific for the variable doxycomment = '' doxycomment += self.comment if '@brief' not in doxycomment: doxycomment = '@brief ' + doxycomment # Build the declaration name = self.name.replace(".", "_") declaration = f"label {name};" # Emit this super().emit(dest, doxycomment, declaration) class AsmMacro(AsmElement): def __init__(self, location, name, comment, args): super().__init__(location, name, comment) self.args = args def dump(self): super().dump() print(f"(Macro)\n---") def emit(self, dest): # Construct arg list without '['s, ']'s and '*'s args = [arg for arg in self.args if arg not in "[]*"] # Construct C-like arg list arg_list = "" if len(args) > 0: arg_list += '(' argc = 0 for arg in args: if argc != 0: arg_list += ", " arg_list += arg argc += 1 arg_list += ')' # Build doxycomment doxycomment = '' doxycomment += self.comment if '@brief' not in doxycomment: doxycomment = '@brief ' + doxycomment # Build declaration declaration = f"#define {self.name}{arg_list}" # Emit this super().emit(dest, doxycomment, declaration) class AsmStruct(AsmElement): def __init__(self, location, name, comment, members): super().__init__(location, name, comment) self.members = members def dump(self): super().dump() print(f"(Struct)\n---") def emit(self, dest): # Build doxycomment doxycomment = '' doxycomment += self.comment if '@brief' not in doxycomment: doxycomment = '@brief ' + doxycomment doxycomment += '\n' # Build declaration declaration = f"struct {self.name}" + " {\n" for member in self.members: if type(member) == AsmVariable: declaration += (f'\t{member.type} {member.name}; ' + f'/**< {member.comment} */\n') declaration += '};' # Emit this super().emit(dest, doxycomment, declaration) class AsmUnion(AsmElement): def __init__(self, location, name, comment, members): super().__init__(location, name, comment) self.members = members def dump(self): super().dump() print(f"(Union)\n---") def emit(self, dest): # Build doxycomment doxycomment = '' doxycomment += self.comment if '@brief' not in doxycomment: doxycomment = '@brief ' + doxycomment # Build declaration declaration = f"union {self.name}" + " {};" # Emit this super().emit(dest, doxycomment, declaration) class VariableNameIsMacroName: def __init__(self, name): self.name = name def is_id(c): return c.isprintable() and c not in "+-/*=<>()[]{};:,|&~#`'\" \n\r\t\v" def is_starts_as_id(s): return not s[0].isdigit() def parse_after_macro(r): location = r.location() # Skip spaces after the "macro" keyword r.skip_spaces() # Read macro name name = "" while is_id(r.curr()) or r.curr() == '#': name += r.step() # Skip spaces after macro name r.skip_spaces() # Find all arguments args = [] arg = '' while r.curr() and r.curr() != ';' and r.curr() != '{': # Collect identifier if is_id(r.curr()): arg += r.step() # Save the collected identifier elif r.curr() == ',': args.append(arg) arg = '' r.step() # Just push the '[' elif r.curr() == '[': args.append(r.step()) # Just push the identifier and get ']' ready to be pushed on next comma elif r.curr() == ']': args.append(arg) arg = r.step() # Just push the identifier and get '*' ready to be pushed on next comma elif r.curr() == '*': args.append(arg) arg = r.step() # Just skip whitespaces elif r.curr().isspace(): r.step() # Something unexpected else: raise Exception(f"Unexpected symbol '{r.curr()}' " + f"at index #{r.i} in the macro declaration " + f"at {location} " + f"(line: {r.lines[r.line_idx]})\n''") # Append the last argument if arg != '': args.append(arg) # Skip t spaces after the argument list r.skip_spaces() # Get a comment if it is: read till the end of the line and # get the comment from the reader while r.curr() != '': r.step() comment = r.comment # Find end of the macro prev = '' while True: if r.curr() == '}' and prev != '\\': break elif r.curr() == '': prev = '' r.nextline() continue prev = r.step() # Build the output return AsmMacro(location, name, comment, args) def parse_variable(r, first_word=None): global warnings location = r.location() # Skip spaces before variable name r.skip_spaces() # Get variable name name = "" # Read it if it was not supplied if first_word is None: while is_id(r.curr()): name += r.step() # Or use the supplied one instead else: name = first_word # Check the name # If it's 0 len, that means threr's something else than an # identifier at the beginning if len(name) == 0: return None # If it starts from digit or othervice illegally it's illegal if not is_starts_as_id(name): return None # Get kind of the identifier from id2kind table kind = id_get_kind(name) # If it's a keyword, that's not a variable declaration if ID_KIND_KEYWORD in kind: return None # If it's a macro name, that's not a variable declaration if ID_KIND_MACRO_NAME in kind: return VariableNameIsMacroName(name) # If it's a datatype or a structure name that's not a # variable declaration: that's just a data # don't document just a data for now if ID_KIND_STRUCT_NAME in kind or ID_KIND_FASM_TYPE in kind: return None # Skip spaces before type name r.skip_spaces() # Read type name var_type = "" while is_id(r.curr()): var_type += r.step() # Check the type name if len(var_type) == 0: # If there's no type identifier after the name # maybe the name is something meaningful for the next parser # return it return name # If it starts from digit or othervice illegally it's illegal if not is_starts_as_id(var_type): return None # Get kind of type identifier type_kind = id_get_kind(var_type) # If it's a keyword, that's not a variable declaration # return the two words of the lexical structure if ID_KIND_KEYWORD in type_kind: return (name, var_type) # Skip spaces before the value r.skip_spaces() # Read the value until the comment or end of the line value = "" while r.curr() != ';' and r.curr() != '' and r.curr() != '\n': value += r.step() # Skip spaces after the value r.skip_spaces() # Read till end of the line to get a comment from the reader while r.curr() != '': r.step() # Build the result return AsmVariable(location, name, r.comment, var_type, value) def parse_after_struct(r, as_union=True): global warnings location = r.location() # Skip spaces after "struct" keyword r.skip_spaces() # Read struct name name = "" while is_id(r.curr()): name += r.step() # Read till end of the line and get the comment from the reader while r.curr() != '': r.step() comment = r.comment # Get to the next line to parse struct members r.nextline() # Parse struct members members = [] while True: r.skip_spaces() var = parse_variable(r) if type(var) == AsmVariable: members.append(var) elif type(var) == str: if var == 'union': # Parse the union as a struct union = parse_after_struct(r, as_union=True) members.append(union) # Skip the ends of the union r.nextline() elif r.curr() == ':': warnings += f"{r.location()}: Skept the label in the struct\n" else: raise Exception(f"Garbage in struct member at {location} " + f" (got '{var}' identifier)") elif type(var) == VariableNameIsMacroName: if var.name == 'ends': break r.nextline() # Return the result if as_union: return AsmStruct(location, name, comment, members) else: return AsmUnion(location, name, comment, members) def parse_after_proc(r): # Get proc name name = r.fetch_identifier() # Next identifier after the proc name identifier = r.fetch_identifier() # Check if the id is 'stdcall' or 'c' (calling convention specifier) # and if so - save the convention and lookup the next identifier calling_convention = '' if identifier == 'stdcall' or identifier == 'c': calling_convention = identifier # If next is a comma, just skip it if r.curr() == ',': r.step() # Read the next identifier identifier = r.fetch_identifier() # Check if the id is 'uses' (used register list specifier) # and if so save the used register list used_regs = [] if identifier == 'uses': # Read the registers while True: reg_name = r.fetch_identifier() if reg_name != '': used_regs.append(reg_name) else: break # If next is a comma, just skip it if r.curr() == ',': r.step() # Read the next identifier identifier = r.fetch_identifier() # Check if there are argument identifiers args = [] while identifier != '': arg_name = identifier arg_type = 'arg_t' # Skip spaces after argument name r.skip_spaces() # If there's a ':' after the name - the next identifier is type if r.curr() == ':': r.step() arg_type = r.fetch_identifier() # If there's a comma - there's one more argument # else no arguments anymore if r.curr() == ',': r.step() identifier = r.fetch_identifier() else: identifier = '' args.append((arg_name, arg_type)) # Get to the end of the line and get a comment from the reader while r.curr() != '': r.step() comment = r.comment # Build the element return AsmFunction(r.location(), name, comment, calling_convention, args, used_regs) def get_declarations(asm_file_contents, asm_file_name): r = AsmReader(asm_file_name) while not r.no_lines(): # Skip leading spaces r.skip_spaces() # Skip the line if it's starting with a comment if r.curr() == ';': r.nextline() continue # Get first word first_word = "" while is_id(r.curr()): first_word += r.step() # Match macro declaration if first_word == "macro": macro = parse_after_macro(r) elements.append(macro) id_add_kind(macro.name, ID_KIND_MACRO_NAME) # Match structure declaration elif first_word == "struct": struct = parse_after_struct(r) elements.append(struct) id_add_kind(struct.name, ID_KIND_STRUCT_NAME) # Match function definition elif first_word == "proc": proc = parse_after_proc(r) elements.append(proc) elif first_word == 'format': # Skip the format directive pass elif first_word == 'include': # Skip the include directive pass elif first_word == 'if': # Skip the conditional directive pass elif first_word == 'repeat': # Skip the repeat directive pass elif first_word == 'purge': while True: # Skip spaces after the 'purge' keyword or after # the comma what separated the previous macro name r.skip_spaces() # Get the purged macro name name = '' while is_id(r.curr()): name += r.step() # Remove the purged macro from the macro names list try: id_remove_kind(name, ID_KIND_MACRO_NAME) except: pass # Skip spaces after the name r.skip_spaces() # If it's comma (',') after then that's not the last purged # macro, continue purging if r.curr() == ',': r.step() continue # Here we purged all the macros should be purged break # Match label or a variable elif len(first_word) != 0: # Skip spaces after the identifier r.skip_spaces() # Match a variable var = parse_variable(r, first_word) if type(var) == AsmVariable: elements.append(var) # If it wasn't a variable but there was an identifier # Maybe that's a label and the identifier is the label name # The parse_variable returns the first found or supplied identifier # In this case it returns the first_word which is supplied # If it didn't match a type identifier after the word elif type(var) == str: name = var # Match label beginning (':' after name) if r.curr() == ':': # Get to the end of the line and # get the coment from the reader while r.curr() != '': r.step() comment = r.comment # Only handle non-local labels if name[0] != '.' and name != "@@": # Treat the label as function if there's @return or # @param in its comment. Otherwise it's just a variable # with type `label` in generated doxygen C if '@return' in comment or '@param' in comment: element = AsmFunction(r.location(), name, comment, '', [], []) else: element = AsmLabel(r.location(), name, comment) elements.append(element) elif r.curr() == '=': # Save the identifier as a set constant id_add_kind(first_word, ID_KIND_SET_CONSTANT) elif type(var) == tuple: (word_one, word_two) = var if word_two == 'equ': # Save the identifier as an equated constant id_add_kind(word_one, ID_KIND_EQUATED_CONSTANT) r.nextline() def it_neds_to_be_parsed(source_file): # If there's no symbols file saved - parse it anyway # cause we need to create the symbols file and use it # if we gonna generate proper doxygen if not os.path.isfile('asmxygen.elements.pickle'): return True dest = doxygen_src_path + '/' + source_file # If there's no the doxygen file it should be compiled to # then yes, we should compile it to doxygen if not os.path.isfile(dest): return True source_change_time = os.path.getmtime(source_file) dest_change_file = os.path.getmtime(dest) # If the source is newer than the doxygen it was compiled to # then the source should be recompiled (existing doxygen is old) if source_change_time > dest_change_file: return True return False def handle_file(handled_files, asm_file_name, subdir="."): global elements # Canonicalize the file path and get it relative to cwd cwd = os.path.abspath(os.path.dirname(sys.argv[0])) asm_file_name = os.path.realpath(asm_file_name) asm_file_name = asm_file_name[len(cwd) + 1:] # If it's lang.inc - skip it if asm_file_name == 'lang.inc': return # If the file was handled in this execution before - skip it if asm_file_name in handled_files: return # Say that the file was handled in this execution handled_files.append(asm_file_name) # Check if the file should be parsed # (if it was modified or wasn't parsed yet) should_get_declarations = True if not it_neds_to_be_parsed(asm_file_name): print(f"Skipping {asm_file_name} (already newest)") should_get_declarations = False else: print(f"Handling {asm_file_name}") # Remove elements parsed from this file before if any elements_to_remove = [ x for x in elements if x.location.split(':')[0] == asm_file_name ] elements = [ x for x in elements if x.location.split(':')[0] != asm_file_name ] # Forget types of identifiers of names of the removed elements for element in elements_to_remove: if type(element) == AsmStruct: id_remove_kind(element.name, ID_KIND_STRUCT_NAME) elif type(element) == AsmMacro: id_remove_kind(element.name, ID_KIND_MACRO_NAME) # Read the source asm_file_contents = open(asm_file_name, "r", encoding="utf-8").read() # Find includes, fix their paths and handle em recoursively includes = re.findall(r'^include (["\'])(.*)\1', asm_file_contents, flags=re.MULTILINE) for include in includes: include = include[1].replace('\\', '/') full_path = subdir + '/' + include # If the path isn't valid, maybe that's not relative path if not os.path.isfile(full_path): full_path = include new_subdir = full_path.rsplit('/', 1)[0] handle_file(handled_files, full_path, new_subdir) # Only collect declarations from the file if it wasn't parsed before if should_get_declarations and not clean_generated_stuff: get_declarations(asm_file_contents, asm_file_name) if __name__ == "__main__": link_root = "http://websvn.kolibrios.org/filedetails.php" link_root += "?repname=Kolibri+OS&path=/kernel/trunk" # Dict where an identifier is assicoated with a string # The string contains characters specifying flags # Available flags: # k - Keyword # m - Macro name # t - fasm data Type name (db, rq, etc.) # s - Struct type name # e - equated constant (name equ value) # = - set constants (name = value) ID_KIND_KEYWORD = 'k' ID_KIND_MACRO_NAME = 'm' ID_KIND_FASM_TYPE = 't' ID_KIND_STRUCT_NAME = 's' ID_KIND_EQUATED_CONSTANT = 'e' ID_KIND_SET_CONSTANT = '=' id2kind = {} for keyword in keywords: id_add_kind(keyword, ID_KIND_KEYWORD) for fasm_type in fasm_types: id_add_kind(fasm_type, ID_KIND_FASM_TYPE) # Warning list warnings = "" # Parameters # Path to doxygen folder to make doxygen files in: -o doxygen_src_path = 'docs/doxygen' # Remove generated doxygen files: --clean clean_generated_stuff = False # Dump all defined symbols: --dump dump_symbols = False # Print symbol stats: --stats print_stats = False # Do not write warnings file: --nowarn enable_warnings = True # Parse arguments parser = argparse.ArgumentParser() parser.add_argument("-o", help="Doxygen output folder") parser.add_argument("--clean", help="Remove generated files", action="store_true") parser.add_argument("--dump", help="Dump all defined symbols", action="store_true") parser.add_argument("--stats", help="Print symbol stats", action="store_true") parser.add_argument("--nowarn", help="Do not write warnings file", action="store_true") parser.add_argument("--noemit", help="Do not emit doxygen files (for testing)", action="store_true") parser.add_argument("--debug", help="Show hashes of files (for testing)", action="store_true") args = parser.parse_args() doxygen_src_path = args.o if args.o else 'docs/doxygen' clean_generated_stuff = args.clean dump_symbols = args.dump print_stats = args.stats enable_warnings = not args.nowarn noemit = args.noemit debug_mode = args.debug # Variables, functions, labels, macros, structure types elements = [] created_files = [] kernel_files = [] output_files = {} # If --debug then all the files are written here # Load remembered list of symbols if os.path.isfile('asmxygen.elements.pickle'): print('Reading existing dump of symbols') pickle_file = open('asmxygen.elements.pickle', 'rb') (elements, id2kind) = pickle.load(pickle_file) pickle_file.close() handle_file(kernel_files, "./kernel.asm") if dump_symbols: stdout = sys.stdout sys.stdout = open('asmxygen.dump.txt', 'w', encoding='utf-8') for asm_element in elements: asm_element.dump() sys.stdout = stdout if clean_generated_stuff: kernel_files_set = set(kernel_files) for file in kernel_files: doxygen_file = f"{doxygen_src_path}/{file}" if (os.path.isfile(doxygen_file)): print(f"Removing {file}... ", end='') os.remove(doxygen_file) print("Done.") elif not noemit: print(f"Writing doumented sources to {doxygen_src_path}") i = 0 new_elements = [x for x in elements if x.new] for element in new_elements: counter = f"[{i + 1}/{len(new_elements)}]" print(f"{counter} Emitting {element.name} from {element.location}") element.emit(doxygen_src_path) i += 1 print(f"Writing dump of symbols to asmxygen.elements.pickle") # Now when the new elements already was written, there's no new # elements anymore for element in elements: element.new = False pickle_file = open('asmxygen.elements.pickle', 'wb') pickle.dump((elements, id2kind), pickle_file) pickle_file.close() if print_stats: var_count = 0 mac_count = 0 lab_count = 0 fun_count = 0 uni_count = 0 str_count = 0 for element in elements: if type(element) == AsmVariable: var_count += 1 elif type(element) == AsmMacro: mac_count += 1 elif type(element) == AsmLabel: lab_count += 1 elif type(element) == AsmFunction: fun_count += 1 elif type(element) == AsmUnion: uni_count += 1 elif type(element) == AsmStruct: str_count += 1 print(f'Parsed variable count: {var_count}') print(f'Parsed macro count: {mac_count}') print(f'Parsed label count: {lab_count}') print(f'Parsed function count: {fun_count}') print(f'Parsed union type count: {uni_count}') print(f'Parsed structure type count: {str_count}') if enable_warnings: open('asmxygen.txt', "w", encoding="utf-8").write(warnings) if debug_mode: hash_per_file = "" for file in output_files: h = hashlib.sha1(bytes(output_files[file], "ascii")).hexdigest() hash_per_file += f"{file}: {h}\n" if not os.path.exists("asmxygen_hash_per_file.txt"): open("asmxygen_hash_per_file.txt", "w").write(hash_per_file) print("NEW") else: reference_hash_per_file = open("asmxygen_hash_per_file.txt").read() if reference_hash_per_file != hash_per_file: diffs = difflib.ndiff(reference_hash_per_file, hash_per_file) print(''.join(diffs)) else: print("SUCCESS")