mirror of
https://github.com/isledecomp/isle-portable.git
synced 2025-09-27 22:54:02 -04:00
roadmap: Suggest order of modules (#507)
* roadmap: Suggest order of modules * Include more modules in the list * Sort by avg address with outliers removed * Mark order-adjusted modules. Show library order. * Use bisect for performance * Use average address for library order * Bugfix for get_module
This commit is contained in:
parent
9e0b19856c
commit
a1488b16b4
@ -5,7 +5,9 @@ in the original binary."""
|
|||||||
import os
|
import os
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Optional
|
import statistics
|
||||||
|
import bisect
|
||||||
|
from typing import Iterator, List, Optional, Tuple
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from isledecomp import Bin as IsleBin
|
from isledecomp import Bin as IsleBin
|
||||||
from isledecomp.cvdump import Cvdump
|
from isledecomp.cvdump import Cvdump
|
||||||
@ -28,6 +30,7 @@ class ModuleMap:
|
|||||||
def __init__(self, pdb, binfile) -> None:
|
def __init__(self, pdb, binfile) -> None:
|
||||||
cvdump = Cvdump(pdb).section_contributions().modules().run()
|
cvdump = Cvdump(pdb).section_contributions().modules().run()
|
||||||
self.module_lookup = {m.id: (m.lib, m.obj) for m in cvdump.modules}
|
self.module_lookup = {m.id: (m.lib, m.obj) for m in cvdump.modules}
|
||||||
|
self.library_lookup = {m.obj: m.lib for m in cvdump.modules}
|
||||||
self.section_contrib = [
|
self.section_contrib = [
|
||||||
(
|
(
|
||||||
binfile.get_abs_addr(sizeref.section, sizeref.offset),
|
binfile.get_abs_addr(sizeref.section, sizeref.offset),
|
||||||
@ -38,8 +41,34 @@ class ModuleMap:
|
|||||||
if binfile.is_valid_section(sizeref.section)
|
if binfile.is_valid_section(sizeref.section)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# For bisect performance enhancement
|
||||||
|
self.contrib_starts = [start for (start, _, __) in self.section_contrib]
|
||||||
|
|
||||||
|
def get_lib_for_module(self, module: str) -> Optional[str]:
|
||||||
|
return self.library_lookup.get(module)
|
||||||
|
|
||||||
|
def get_all_cmake_modules(self) -> List[str]:
|
||||||
|
return [
|
||||||
|
obj
|
||||||
|
for (_, (__, obj)) in self.module_lookup.items()
|
||||||
|
if obj.startswith("CMakeFiles")
|
||||||
|
]
|
||||||
|
|
||||||
def get_module(self, addr: int) -> Optional[str]:
|
def get_module(self, addr: int) -> Optional[str]:
|
||||||
for start, size, module_id in self.section_contrib:
|
i = bisect.bisect_left(self.contrib_starts, addr)
|
||||||
|
# If the addr matches the section contribution start, we are in the
|
||||||
|
# right spot. Otherwise, we need to subtract one here.
|
||||||
|
# We don't want the insertion point given by bisect, but the
|
||||||
|
# section contribution that contains the address.
|
||||||
|
|
||||||
|
(potential_start, _, __) = self.section_contrib[i]
|
||||||
|
if potential_start != addr:
|
||||||
|
i -= 1
|
||||||
|
|
||||||
|
# Safety catch: clamp to range of indices from section_contrib.
|
||||||
|
i = max(0, min(i, len(self.section_contrib) - 1))
|
||||||
|
|
||||||
|
(start, size, module_id) = self.section_contrib[i]
|
||||||
if start <= addr < start + size:
|
if start <= addr < start + size:
|
||||||
if (module := self.module_lookup.get(module_id)) is not None:
|
if (module := self.module_lookup.get(module_id)) is not None:
|
||||||
return module
|
return module
|
||||||
@ -58,6 +87,9 @@ def print_sections(sections):
|
|||||||
print()
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
ALLOWED_TYPE_ABBREVIATIONS = ["fun", "dat", "poi", "str", "vta"]
|
||||||
|
|
||||||
|
|
||||||
def match_type_abbreviation(mtype: Optional[SymbolType]) -> str:
|
def match_type_abbreviation(mtype: Optional[SymbolType]) -> str:
|
||||||
"""Return abbreviation of the given SymbolType name"""
|
"""Return abbreviation of the given SymbolType name"""
|
||||||
if mtype is None:
|
if mtype is None:
|
||||||
@ -66,6 +98,42 @@ def match_type_abbreviation(mtype: Optional[SymbolType]) -> str:
|
|||||||
return mtype.name.lower()[:3]
|
return mtype.name.lower()[:3]
|
||||||
|
|
||||||
|
|
||||||
|
def get_cmakefiles_prefix(module: str) -> str:
|
||||||
|
"""For the given .obj, get the "CMakeFiles/something.dir/" prefix.
|
||||||
|
For lack of a better option, this is the library for this module."""
|
||||||
|
if module.startswith("CMakeFiles"):
|
||||||
|
return "/".join(module.split("/", 2)[:2]) + "/"
|
||||||
|
|
||||||
|
return module
|
||||||
|
|
||||||
|
|
||||||
|
def truncate_module_name(prefix: str, module: str) -> str:
|
||||||
|
"""Remove the CMakeFiles prefix and the .obj suffix for the given module.
|
||||||
|
Input: CMakeFiles/lego1.dir/, CMakeFiles/lego1.dir/LEGO1/define.cpp.obj
|
||||||
|
Output: LEGO1/define.cpp"""
|
||||||
|
|
||||||
|
if module.startswith(prefix):
|
||||||
|
module = module[len(prefix) :]
|
||||||
|
|
||||||
|
if module.endswith(".obj"):
|
||||||
|
module = module[:-4]
|
||||||
|
|
||||||
|
return module
|
||||||
|
|
||||||
|
|
||||||
|
def avg_remove_outliers(entries: List[int]) -> int:
|
||||||
|
"""Compute the average from this list of entries (addresses)
|
||||||
|
after removing outlier values."""
|
||||||
|
|
||||||
|
if len(entries) == 1:
|
||||||
|
return entries[0]
|
||||||
|
|
||||||
|
avg = statistics.mean(entries)
|
||||||
|
sd = statistics.pstdev(entries)
|
||||||
|
|
||||||
|
return int(statistics.mean([e for e in entries if abs(e - avg) <= 2 * sd]))
|
||||||
|
|
||||||
|
|
||||||
RoadmapRow = namedtuple(
|
RoadmapRow = namedtuple(
|
||||||
"RoadmapRow",
|
"RoadmapRow",
|
||||||
[
|
[
|
||||||
@ -82,6 +150,144 @@ RoadmapRow = namedtuple(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DeltaCollector:
|
||||||
|
"""Reads each row of the results and aggregates information about the
|
||||||
|
placement of each module."""
|
||||||
|
|
||||||
|
def __init__(self, match_type: str = "fun") -> None:
|
||||||
|
# The displacement for each symbol from each module
|
||||||
|
self.disp_map = {}
|
||||||
|
|
||||||
|
# Each address for each module
|
||||||
|
self.addresses = {}
|
||||||
|
|
||||||
|
# The earliest address for each module
|
||||||
|
self.earliest = {}
|
||||||
|
|
||||||
|
# String abbreviation for which symbol type we are checking
|
||||||
|
self.match_type = "fun"
|
||||||
|
|
||||||
|
match_type = str(match_type).strip().lower()[:3]
|
||||||
|
if match_type in ALLOWED_TYPE_ABBREVIATIONS:
|
||||||
|
self.match_type = match_type
|
||||||
|
|
||||||
|
def read_row(self, row: RoadmapRow):
|
||||||
|
if row.module is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
if row.sym_type != self.match_type:
|
||||||
|
return
|
||||||
|
|
||||||
|
if row.orig_addr is not None:
|
||||||
|
if row.module not in self.addresses:
|
||||||
|
self.addresses[row.module] = []
|
||||||
|
|
||||||
|
self.addresses[row.module].append(row.orig_addr)
|
||||||
|
|
||||||
|
if row.orig_addr < self.earliest.get(row.module, 0xFFFFFFFFF):
|
||||||
|
self.earliest[row.module] = row.orig_addr
|
||||||
|
|
||||||
|
if row.displacement is not None:
|
||||||
|
if row.module not in self.disp_map:
|
||||||
|
self.disp_map[row.module] = []
|
||||||
|
|
||||||
|
self.disp_map[row.module].append(row.displacement)
|
||||||
|
|
||||||
|
def iter_sorted(self) -> Iterator[Tuple[int, int]]:
|
||||||
|
"""Compute the average address for each module, then generate them
|
||||||
|
in ascending order."""
|
||||||
|
avg_address = {
|
||||||
|
mod: avg_remove_outliers(values) for mod, values in self.addresses.items()
|
||||||
|
}
|
||||||
|
for mod, avg in sorted(avg_address.items(), key=lambda x: x[1]):
|
||||||
|
yield (avg, mod)
|
||||||
|
|
||||||
|
|
||||||
|
def suggest_order(results: List[RoadmapRow], module_map: ModuleMap, match_type: str):
|
||||||
|
"""Suggest the order of modules for CMakeLists.txt"""
|
||||||
|
|
||||||
|
dc = DeltaCollector(match_type)
|
||||||
|
for row in results:
|
||||||
|
dc.read_row(row)
|
||||||
|
|
||||||
|
# First, show the order of .obj files for the "CMake Modules"
|
||||||
|
# Meaning: the modules where the .obj file begins with "CMakeFiles".
|
||||||
|
# These are the libraries where we directly control the order.
|
||||||
|
# The library name (from cvdump) doesn't make it obvious that these are
|
||||||
|
# our libraries so we derive the name based on the CMakeFiles prefix.
|
||||||
|
leftover_modules = set(module_map.get_all_cmake_modules())
|
||||||
|
|
||||||
|
# A little convoluted, but we want to take the first two tokens
|
||||||
|
# of the string with '/' as the delimiter.
|
||||||
|
# i.e. CMakeFiles/isle.dir/
|
||||||
|
# The idea is to print exactly what appears in CMakeLists.txt.
|
||||||
|
cmake_prefixes = sorted(set(get_cmakefiles_prefix(mod) for mod in leftover_modules))
|
||||||
|
|
||||||
|
# Save this off because we'll use it again later.
|
||||||
|
computed_order = list(dc.iter_sorted())
|
||||||
|
|
||||||
|
for prefix in cmake_prefixes:
|
||||||
|
print(prefix)
|
||||||
|
|
||||||
|
last_earliest = 0
|
||||||
|
# Show modules ordered by the computed average of addresses
|
||||||
|
for _, module in computed_order:
|
||||||
|
if not module.startswith(prefix):
|
||||||
|
continue
|
||||||
|
|
||||||
|
leftover_modules.remove(module)
|
||||||
|
|
||||||
|
avg_displacement = None
|
||||||
|
displacements = dc.disp_map.get(module)
|
||||||
|
if displacements is not None and len(displacements) > 0:
|
||||||
|
avg_displacement = int(statistics.mean(displacements))
|
||||||
|
|
||||||
|
# Call attention to any modules where ordering by earliest
|
||||||
|
# address is different from the computed order we display.
|
||||||
|
earliest = dc.earliest.get(module)
|
||||||
|
ooo_mark = "*" if earliest < last_earliest else " "
|
||||||
|
last_earliest = earliest
|
||||||
|
|
||||||
|
code_file = truncate_module_name(prefix, module)
|
||||||
|
print(f"0x{earliest:08x}{ooo_mark} {avg_displacement:10} {code_file}")
|
||||||
|
|
||||||
|
# These modules are included in the final binary (in some form) but
|
||||||
|
# don't contribute any symbols of the type we are checking.
|
||||||
|
# n.b. There could still be other modules that are part of
|
||||||
|
# CMakeLists.txt but are not included in the pdb for whatever reason.
|
||||||
|
# In other words: don't take the list we provide as the final word on
|
||||||
|
# what should or should not be included.
|
||||||
|
# This is merely a suggestion of the order.
|
||||||
|
for module in leftover_modules:
|
||||||
|
if not module.startswith(prefix):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# aligned with previous print
|
||||||
|
code_file = truncate_module_name(prefix, module)
|
||||||
|
print(f" no suggestion {code_file}")
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Now display the order of all libaries in the final file.
|
||||||
|
library_order = {}
|
||||||
|
|
||||||
|
for start, module in computed_order:
|
||||||
|
lib = module_map.get_lib_for_module(module)
|
||||||
|
if lib is None:
|
||||||
|
lib = get_cmakefiles_prefix(module)
|
||||||
|
|
||||||
|
if start < library_order.get(lib, 0xFFFFFFFFF):
|
||||||
|
library_order[lib] = start
|
||||||
|
|
||||||
|
print("Library order (average address shown):")
|
||||||
|
for lib, start in sorted(library_order.items(), key=lambda x: x[1]):
|
||||||
|
# Strip off any OS path for brevity
|
||||||
|
if not lib.startswith("CMakeFiles"):
|
||||||
|
lib = os.path.basename(lib)
|
||||||
|
|
||||||
|
print(f"{lib:40} {start:08x}")
|
||||||
|
|
||||||
|
|
||||||
def print_text_report(results: List[RoadmapRow]):
|
def print_text_report(results: List[RoadmapRow]):
|
||||||
"""Print the result with original and recomp addresses."""
|
"""Print the result with original and recomp addresses."""
|
||||||
for row in results:
|
for row in results:
|
||||||
@ -150,6 +356,13 @@ def parse_args() -> argparse.Namespace:
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--verbose", "-v", action="store_true", help="Show recomp addresses in output"
|
"--verbose", "-v", action="store_true", help="Show recomp addresses in output"
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--order",
|
||||||
|
const="fun",
|
||||||
|
nargs="?",
|
||||||
|
type=str,
|
||||||
|
help="Show suggested order of modules (using the specified symbol type)",
|
||||||
|
)
|
||||||
|
|
||||||
(args, _) = parser.parse_known_args()
|
(args, _) = parser.parse_known_args()
|
||||||
|
|
||||||
@ -245,6 +458,10 @@ def main():
|
|||||||
|
|
||||||
results = list(map(to_roadmap_row, engine.get_all()))
|
results = list(map(to_roadmap_row, engine.get_all()))
|
||||||
|
|
||||||
|
if args.order is not None:
|
||||||
|
suggest_order(results, module_map, args.order)
|
||||||
|
return
|
||||||
|
|
||||||
if args.csv is None:
|
if args.csv is None:
|
||||||
if args.verbose:
|
if args.verbose:
|
||||||
print("ORIG sections:")
|
print("ORIG sections:")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user