Links:

Eliminating Magic Numbers in Binary Ninja

While working on Part 2: Reversing the File Encryption, I ran into an avoidable and time consuming issue: several hardcoded constants in the target binary had no associated enums.

Binary Ninja supports enums, but it does not automatically convert #define constants from imported headers into enums. That’s a problem when reversing Windows binaries, because a significant portion of meaningful symbolic constants in the Windows SDK are defined as preprocessor macros rather than enum members. As a result, many hardcoded constants in the disassembly remain raw integers instead of resolving to descriptive names.

In this post, we’ll fix that by extracting #define constants from the Windows headers and transforming them into enums that Binary Ninja can consume.

Automating the Extraction

Rather than writing a parser myself, I delegated the task to Claude and generated a quick Python script to crawl the Windows headers and emit enums.

The generated script:

#!/usr/bin/env python3
"""
win_defines_to_enums.py

Strict extractor:
- Captures ALL object-like #defines
- Keeps first definition (prevents redefinition corruption)
- Safely evaluates integer constant expressions
- Groups by prefix
- Ensures NO duplicate values exist in any enum
- Emits strictly valid C identifiers
- Emits header source comments (per enum + per member)
"""

from __future__ import annotations

import argparse
import logging
import os
import re
import sys
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Tuple

LOG = logging.getLogger("win_defines_to_enums")

# -----------------------------------------------------------------------------
# Robust #define matcher
# -----------------------------------------------------------------------------

_RE_DEFINE = re.compile(
    r"^[ \t]*#\s*define[ \t]+"
    r"(?P<name>[A-Za-z_]\w*)"
    r"(?!\()"
    r"[ \t]+"
    r"(?P<body>.+?)"
    r"(?:[ \t]+//.*)?"
    r"(?:[ \t]+/\*.*?\*/)?"
    r"[ \t]*$",
    re.MULTILINE,
)

_RE_TOKEN = re.compile(
    r"(?:"
    r"0[xX][0-9a-fA-F]+[uUlL]*"
    r"|0[bB][01]+[uUlL]*"
    r"|0[0-7]+[uUlL]*"
    r"|[1-9][0-9]*[uUlL]*"
    r"|0[uUlL]*"
    r"|[A-Za-z_]\w*"
    r"|<<|>>"
    r"|[+\-*/&|^~()%<>!,]"
    r")"
)

_RE_INT_SUFFIX = re.compile(r"^(.*?)[uUlL]+$")
_RE_CAST = re.compile(
    r"\(\s*(?:unsigned\s+)?[A-Z_][A-Z_0-9]*\s*\)"
    r"(?=\s*[0-9A-Za-z_(~\-+])"
)

_RE_INVALID_C_CHAR = re.compile(r"[^A-Za-z0-9_]")
_RE_MULTI_UNDERSCORE = re.compile(r"_+")

_MAX_DEPTH = 64


# -----------------------------------------------------------------------------
# Safe evaluator
# -----------------------------------------------------------------------------

class EvalError(Exception):
    pass


class SafeEvaluator:

    def __init__(self, resolved: Dict[str, int]):
        self.resolved = resolved
        self.tokens: List[str] = []
        self.pos = 0
        self.depth = 0

    def evaluate(self, body: str) -> int:
        body = _RE_CAST.sub("", body)
        self.tokens = _RE_TOKEN.findall(body)
        self.pos = 0
        self.depth = 0
        val = self._expr()
        if self.pos != len(self.tokens):
            raise EvalError()
        return val & 0xFFFFFFFFFFFFFFFF

    def _peek(self):
        return self.tokens[self.pos] if self.pos < len(self.tokens) else None

    def _consume(self):
        tok = self._peek()
        if tok is None:
            raise EvalError()
        self.pos += 1
        return tok

    def _check(self):
        self.depth += 1
        if self.depth > _MAX_DEPTH:
            raise EvalError()

    def _expr(self):
        self._check()
        return self._bitor()

    def _bitor(self):
        v = self._bitand()
        while self._peek() == "|":
            self._consume()
            v |= self._bitand()
        return v

    def _bitand(self):
        v = self._shift()
        while self._peek() == "&":
            self._consume()
            v &= self._shift()
        return v

    def _shift(self):
        v = self._add()
        while self._peek() in ("<<", ">>"):
            op = self._consume()
            rhs = self._add()
            v = v << rhs if op == "<<" else v >> rhs
        return v

    def _add(self):
        v = self._mul()
        while self._peek() in ("+", "-"):
            op = self._consume()
            rhs = self._mul()
            v = v + rhs if op == "+" else v - rhs
        return v

    def _mul(self):
        v = self._unary()
        while self._peek() in ("*", "/", "%"):
            op = self._consume()
            rhs = self._unary()
            if op == "*":
                v *= rhs
            elif op == "/":
                v = int(v / rhs)
            else:
                v %= rhs
        return v

    def _unary(self):
        tok = self._peek()
        if tok == "~":
            self._consume()
            return ~self._unary()
        if tok == "-":
            self._consume()
            return -self._unary()
        return self._primary()

    def _primary(self):
        tok = self._consume()

        if tok == "(":
            val = self._expr()
            if self._consume() != ")":
                raise EvalError()
            return val

        if re.fullmatch(r"[A-Za-z_]\w*", tok):
            if tok in self.resolved:
                return self.resolved[tok]
            raise EvalError()

        m = _RE_INT_SUFFIX.match(tok)
        if m:
            tok = m.group(1)

        return int(tok, 0)


# -----------------------------------------------------------------------------
# Identifier sanitization
# -----------------------------------------------------------------------------

def sanitize_identifier(name: str) -> str:
    name = _RE_INVALID_C_CHAR.sub("_", name)
    name = _RE_MULTI_UNDERSCORE.sub("_", name)
    name = name.strip("_")

    if not name:
        name = "UNKNOWN"

    if name[0].isdigit():
        name = f"N_{name}"

    if name.startswith("__"):
        name = "N" + name
    elif name.startswith("_") and len(name) > 1 and name[1].isupper():
        name = "N" + name

    return name


# -----------------------------------------------------------------------------
# Scan headers
# -----------------------------------------------------------------------------

def scan_defines(dirs: List[Path]) -> Dict[str, Tuple[str, Path]]:
    raw = {}

    for root in dirs:
        for dirpath, _, files in os.walk(root):
            for fname in files:
                if not fname.endswith((".h", ".hpp", ".hh", ".inl")):
                    continue

                path = Path(dirpath) / fname

                try:
                    text = path.read_text(encoding="utf-8", errors="replace")
                except OSError:
                    continue

                text = text.replace("\\\n", " ")

                for m in _RE_DEFINE.finditer(text):
                    name = m.group("name")
                    body = m.group("body").strip()

                    if body.startswith(("\"", "L\"", "//", "/*")):
                        continue

                    if name not in raw:
                        raw[name] = (body, path)

    return raw


# -----------------------------------------------------------------------------
# Resolve
# -----------------------------------------------------------------------------

def resolve_defines(raw: Dict[str, Tuple[str, Path]]):
    resolved = {}
    origin = {}
    pending = dict(raw)

    for _ in range(32):
        evaluator = SafeEvaluator(resolved)
        progress = False
        new_pending = {}

        for name, (body, path) in pending.items():
            try:
                resolved[name] = evaluator.evaluate(body)
                origin[name] = path
                progress = True
            except Exception:
                new_pending[name] = (body, path)

        if not progress:
            break

        pending = new_pending

    return resolved, origin


# -----------------------------------------------------------------------------
# Group by prefix
# -----------------------------------------------------------------------------

def group_defines(resolved, origin):
    groups = defaultdict(dict)
    for name, value in resolved.items():
        if "_" not in name:
            continue
        prefix = name.split("_")[0]
        groups[prefix][name] = (value, origin[name])
    return dict(groups)


# -----------------------------------------------------------------------------
# Emit header
# -----------------------------------------------------------------------------

def emit(groups):
    lines = []
    lines.append("#ifndef WINDOWS_DEFINES_ENUMS_H")
    lines.append("#define WINDOWS_DEFINES_ENUMS_H")
    lines.append("")

    used_enum_types = set()

    for gname in sorted(groups):
        enum_type = sanitize_identifier(f"{gname}_ENUM")

        base_type = enum_type
        counter = 2
        while enum_type in used_enum_types:
            enum_type = f"{base_type}_{counter}"
            counter += 1

        used_enum_types.add(enum_type)

        members = groups[gname]

        source_files = sorted({str(p.name) for _, p in members.values()})

        lines.append("/* ========================================================================")
        lines.append(f"   Enum Group: {gname}")
        lines.append("   Generated from:")
        for sf in source_files:
            lines.append(f"     - {sf}")
        lines.append("   ======================================================================== */")
        lines.append("")

        lines.append(f"typedef enum {enum_type} {{")

        used_members = set()

        for name, (value, path) in sorted(
            members.items(), key=lambda kv: (kv[1][0], kv[0])
        ):
            member = sanitize_identifier(name)

            base_member = member
            counter = 2
            while member in used_members:
                member = f"{base_member}_{counter}"
                counter += 1

            used_members.add(member)

            lines.append(f"    /* {path.name} */")
            lines.append(f"    {member} = 0x{value:X},")
            lines.append("")

        lines.append(f"}} {enum_type};")
        lines.append("")

    lines.append("#endif")
    return "\n".join(lines)


# -----------------------------------------------------------------------------
# Main
# -----------------------------------------------------------------------------

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("dirs", nargs="+", type=Path)
    parser.add_argument("-o", "--output", type=Path)
    args = parser.parse_args()

    raw = scan_defines(args.dirs)
    resolved, origin = resolve_defines(raw)
    grouped = group_defines(resolved, origin)
    header = emit(grouped)

    if args.output:
        args.output.write_text(header, encoding="utf-8")
    else:
        print(header)


if __name__ == "__main__":
    main()

The script was executed as follows:

python gen_enums.py -o windows_enums.h "C:\\Program Files (x86)\\Windows Kits\\10\\Include\\10.0.26100.0\\um" "C:\\Program Files (x86)\\Windows Kits\\10\\Include\\10.0.26100.0\\shared" "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\VC\\Tools\\MSVC\\14.50.35717\\include"

This produced a header: windows_enums.h.

Import Errors and Cleanup

Attempting to import the generated header into Binary Ninja immediately surfaced a few compiler errors:

error: /home/birk/Downloads/windows_enums(1).h:15004:5 redefinition of enumerator 'BITS_PER_BYTE'
error: /home/birk/Downloads/windows_enums(1).h:121110:5 redefinition of enumerator 'FACILITY_WINDOWS_STORE'
error: /home/birk/Downloads/windows_enums(1).h:289439:3 redefinition of 'TMT_ENUM' as different kind of symbol
3 errors generated.

These were straightforward: duplicate macro expansions across the SDK. The fix was manual deduplication: removing the conflicting definitions. Now we’re left with 7.1 MiB file: windows_enums.h, which can be imported in Binary Ninja under: Analysis->Import Header File.