Eliminating Magic Numbers in Binary Ninja
Table of Contents
While working on Part 2: Reversing the File Encryption, I ran into an avoidable and time consuming issue: several hardcoded constants in the target binary had no associated enums.
Binary Ninja supports enums, but it does not automatically convert #define constants from imported headers into enums. That’s a problem when reversing Windows binaries, because a significant portion of meaningful symbolic constants in the Windows SDK are defined as preprocessor macros rather than enum members. As a result, many hardcoded constants in the disassembly remain raw integers instead of resolving to descriptive names.
In this post, we’ll fix that by extracting #define constants from the Windows headers and transforming them into enums that Binary Ninja can consume.
Automating the Extraction
Rather than writing a parser myself, I delegated the task to Claude and generated a quick Python script to crawl the Windows headers and emit enums.
The generated script:
#!/usr/bin/env python3
"""
win_defines_to_enums.py
Strict extractor:
- Captures ALL object-like #defines
- Keeps first definition (prevents redefinition corruption)
- Safely evaluates integer constant expressions
- Groups by prefix
- Ensures NO duplicate values exist in any enum
- Emits strictly valid C identifiers
- Emits header source comments (per enum + per member)
"""
from __future__ import annotations
import argparse
import logging
import os
import re
import sys
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Tuple
LOG = logging.getLogger("win_defines_to_enums")
# -----------------------------------------------------------------------------
# Robust #define matcher
# -----------------------------------------------------------------------------
_RE_DEFINE = re.compile(
r"^[ \t]*#\s*define[ \t]+"
r"(?P<name>[A-Za-z_]\w*)"
r"(?!\()"
r"[ \t]+"
r"(?P<body>.+?)"
r"(?:[ \t]+//.*)?"
r"(?:[ \t]+/\*.*?\*/)?"
r"[ \t]*$",
re.MULTILINE,
)
_RE_TOKEN = re.compile(
r"(?:"
r"0[xX][0-9a-fA-F]+[uUlL]*"
r"|0[bB][01]+[uUlL]*"
r"|0[0-7]+[uUlL]*"
r"|[1-9][0-9]*[uUlL]*"
r"|0[uUlL]*"
r"|[A-Za-z_]\w*"
r"|<<|>>"
r"|[+\-*/&|^~()%<>!,]"
r")"
)
_RE_INT_SUFFIX = re.compile(r"^(.*?)[uUlL]+$")
_RE_CAST = re.compile(
r"\(\s*(?:unsigned\s+)?[A-Z_][A-Z_0-9]*\s*\)"
r"(?=\s*[0-9A-Za-z_(~\-+])"
)
_RE_INVALID_C_CHAR = re.compile(r"[^A-Za-z0-9_]")
_RE_MULTI_UNDERSCORE = re.compile(r"_+")
_MAX_DEPTH = 64
# -----------------------------------------------------------------------------
# Safe evaluator
# -----------------------------------------------------------------------------
class EvalError(Exception):
pass
class SafeEvaluator:
def __init__(self, resolved: Dict[str, int]):
self.resolved = resolved
self.tokens: List[str] = []
self.pos = 0
self.depth = 0
def evaluate(self, body: str) -> int:
body = _RE_CAST.sub("", body)
self.tokens = _RE_TOKEN.findall(body)
self.pos = 0
self.depth = 0
val = self._expr()
if self.pos != len(self.tokens):
raise EvalError()
return val & 0xFFFFFFFFFFFFFFFF
def _peek(self):
return self.tokens[self.pos] if self.pos < len(self.tokens) else None
def _consume(self):
tok = self._peek()
if tok is None:
raise EvalError()
self.pos += 1
return tok
def _check(self):
self.depth += 1
if self.depth > _MAX_DEPTH:
raise EvalError()
def _expr(self):
self._check()
return self._bitor()
def _bitor(self):
v = self._bitand()
while self._peek() == "|":
self._consume()
v |= self._bitand()
return v
def _bitand(self):
v = self._shift()
while self._peek() == "&":
self._consume()
v &= self._shift()
return v
def _shift(self):
v = self._add()
while self._peek() in ("<<", ">>"):
op = self._consume()
rhs = self._add()
v = v << rhs if op == "<<" else v >> rhs
return v
def _add(self):
v = self._mul()
while self._peek() in ("+", "-"):
op = self._consume()
rhs = self._mul()
v = v + rhs if op == "+" else v - rhs
return v
def _mul(self):
v = self._unary()
while self._peek() in ("*", "/", "%"):
op = self._consume()
rhs = self._unary()
if op == "*":
v *= rhs
elif op == "/":
v = int(v / rhs)
else:
v %= rhs
return v
def _unary(self):
tok = self._peek()
if tok == "~":
self._consume()
return ~self._unary()
if tok == "-":
self._consume()
return -self._unary()
return self._primary()
def _primary(self):
tok = self._consume()
if tok == "(":
val = self._expr()
if self._consume() != ")":
raise EvalError()
return val
if re.fullmatch(r"[A-Za-z_]\w*", tok):
if tok in self.resolved:
return self.resolved[tok]
raise EvalError()
m = _RE_INT_SUFFIX.match(tok)
if m:
tok = m.group(1)
return int(tok, 0)
# -----------------------------------------------------------------------------
# Identifier sanitization
# -----------------------------------------------------------------------------
def sanitize_identifier(name: str) -> str:
name = _RE_INVALID_C_CHAR.sub("_", name)
name = _RE_MULTI_UNDERSCORE.sub("_", name)
name = name.strip("_")
if not name:
name = "UNKNOWN"
if name[0].isdigit():
name = f"N_{name}"
if name.startswith("__"):
name = "N" + name
elif name.startswith("_") and len(name) > 1 and name[1].isupper():
name = "N" + name
return name
# -----------------------------------------------------------------------------
# Scan headers
# -----------------------------------------------------------------------------
def scan_defines(dirs: List[Path]) -> Dict[str, Tuple[str, Path]]:
raw = {}
for root in dirs:
for dirpath, _, files in os.walk(root):
for fname in files:
if not fname.endswith((".h", ".hpp", ".hh", ".inl")):
continue
path = Path(dirpath) / fname
try:
text = path.read_text(encoding="utf-8", errors="replace")
except OSError:
continue
text = text.replace("\\\n", " ")
for m in _RE_DEFINE.finditer(text):
name = m.group("name")
body = m.group("body").strip()
if body.startswith(("\"", "L\"", "//", "/*")):
continue
if name not in raw:
raw[name] = (body, path)
return raw
# -----------------------------------------------------------------------------
# Resolve
# -----------------------------------------------------------------------------
def resolve_defines(raw: Dict[str, Tuple[str, Path]]):
resolved = {}
origin = {}
pending = dict(raw)
for _ in range(32):
evaluator = SafeEvaluator(resolved)
progress = False
new_pending = {}
for name, (body, path) in pending.items():
try:
resolved[name] = evaluator.evaluate(body)
origin[name] = path
progress = True
except Exception:
new_pending[name] = (body, path)
if not progress:
break
pending = new_pending
return resolved, origin
# -----------------------------------------------------------------------------
# Group by prefix
# -----------------------------------------------------------------------------
def group_defines(resolved, origin):
groups = defaultdict(dict)
for name, value in resolved.items():
if "_" not in name:
continue
prefix = name.split("_")[0]
groups[prefix][name] = (value, origin[name])
return dict(groups)
# -----------------------------------------------------------------------------
# Emit header
# -----------------------------------------------------------------------------
def emit(groups):
lines = []
lines.append("#ifndef WINDOWS_DEFINES_ENUMS_H")
lines.append("#define WINDOWS_DEFINES_ENUMS_H")
lines.append("")
used_enum_types = set()
for gname in sorted(groups):
enum_type = sanitize_identifier(f"{gname}_ENUM")
base_type = enum_type
counter = 2
while enum_type in used_enum_types:
enum_type = f"{base_type}_{counter}"
counter += 1
used_enum_types.add(enum_type)
members = groups[gname]
source_files = sorted({str(p.name) for _, p in members.values()})
lines.append("/* ========================================================================")
lines.append(f" Enum Group: {gname}")
lines.append(" Generated from:")
for sf in source_files:
lines.append(f" - {sf}")
lines.append(" ======================================================================== */")
lines.append("")
lines.append(f"typedef enum {enum_type} {{")
used_members = set()
for name, (value, path) in sorted(
members.items(), key=lambda kv: (kv[1][0], kv[0])
):
member = sanitize_identifier(name)
base_member = member
counter = 2
while member in used_members:
member = f"{base_member}_{counter}"
counter += 1
used_members.add(member)
lines.append(f" /* {path.name} */")
lines.append(f" {member} = 0x{value:X},")
lines.append("")
lines.append(f"}} {enum_type};")
lines.append("")
lines.append("#endif")
return "\n".join(lines)
# -----------------------------------------------------------------------------
# Main
# -----------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser()
parser.add_argument("dirs", nargs="+", type=Path)
parser.add_argument("-o", "--output", type=Path)
args = parser.parse_args()
raw = scan_defines(args.dirs)
resolved, origin = resolve_defines(raw)
grouped = group_defines(resolved, origin)
header = emit(grouped)
if args.output:
args.output.write_text(header, encoding="utf-8")
else:
print(header)
if __name__ == "__main__":
main()
The script was executed as follows:
python gen_enums.py -o windows_enums.h "C:\\Program Files (x86)\\Windows Kits\\10\\Include\\10.0.26100.0\\um" "C:\\Program Files (x86)\\Windows Kits\\10\\Include\\10.0.26100.0\\shared" "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\VC\\Tools\\MSVC\\14.50.35717\\include"
This produced a header: windows_enums.h.
Import Errors and Cleanup
Attempting to import the generated header into Binary Ninja immediately surfaced a few compiler errors:
error: /home/birk/Downloads/windows_enums(1).h:15004:5 redefinition of enumerator 'BITS_PER_BYTE'
error: /home/birk/Downloads/windows_enums(1).h:121110:5 redefinition of enumerator 'FACILITY_WINDOWS_STORE'
error: /home/birk/Downloads/windows_enums(1).h:289439:3 redefinition of 'TMT_ENUM' as different kind of symbol
3 errors generated.
These were straightforward: duplicate macro expansions across the SDK. The fix was manual deduplication: removing the conflicting definitions. Now we’re left with 7.1 MiB file: windows_enums.h, which can be imported in Binary Ninja under: Analysis->Import Header File.