set_bits(loc32, 10, value, shift, 12);
}
+// Relaxable 12-bit low part of an absolute address.
+// Usually paired with patch_aarch64_21rx (below).
+void
+patch_aarch64_12x(unsigned char *location, uint64_t value)
+{
+ patch_aarch64_12(location, value);
+}
+
// 16-bit low part of an absolute address.
void
patch_aarch64_16a(unsigned char *location, uint64_t value)
set_bits(loc32, 5, value, 2, 19);
}
+// Relaxable 21-bit count of pages between this page and an absolute address's
+// page. Usually paired with patch_aarch64_12x (above).
+void
+patch_aarch64_21rx(unsigned char *location, uint64_t value)
+{
+ patch_aarch64_21r(location, value);
+}
+
// 21-bit relative branch.
void
patch_aarch64_19r(unsigned char *location, uint64_t value)
set_bits(loc32, 0, value, 2, 26);
}
+// A pair of patch_aarch64_21rx and patch_aarch64_12x.
+void
+patch_aarch64_33rx(unsigned char *location_a, unsigned char *location_b, uint64_t value)
+{
+ uint32_t *loc32_a = (uint32_t *)location_a;
+ uint32_t *loc32_b = (uint32_t *)location_b;
+ // Try to relax the pair of GOT loads into an immediate value:
+ assert(IS_AARCH64_ADRP(*loc32_a));
+ assert(IS_AARCH64_LDR_OR_STR(*loc32_b));
+ unsigned char reg = get_bits(*loc32_a, 0, 5);
+ // There should be only one register involved:
+ assert(reg == get_bits(*loc32_a, 0, 5)); // ldr's output register.
+ assert(reg == get_bits(*loc32_b, 5, 5)); // ldr's input register.
+ uint64_t relaxed = *(uint64_t *)value;
+ if (relaxed < (1UL << 16)) {
+ // adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; nop
+ *loc32_a = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | reg;
+ *loc32_b = 0xD503201F;
+ return;
+ }
+ if (relaxed < (1ULL << 32)) {
+ // adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; movk reg, YYY
+ *loc32_a = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | reg;
+ *loc32_b = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 5) | reg;
+ return;
+ }
+ int64_t page_delta = (relaxed >> 12) - ((uintptr_t)location_a >> 12);
+ if (page_delta >= -(1L << 20) &&
+ page_delta < (1L << 20))
+ {
+ // adrp reg, AAA; ldr reg, [reg + BBB] -> adrp reg, AAA; add reg, reg, BBB
+ patch_aarch64_21rx(location_a, relaxed);
+ *loc32_b = 0x91000000 | get_bits(relaxed, 0, 12) << 10 | reg << 5 | reg;
+ return;
+ }
+ relaxed = value - (uintptr_t)location_a;
+ if ((relaxed & 0x3) == 0 &&
+ (int64_t)relaxed >= -(1L << 19) &&
+ (int64_t)relaxed < (1L << 19))
+ {
+ // adrp reg, AAA; ldr reg, [reg + BBB] -> ldr reg, XXX; nop
+ *loc32_a = 0x58000000 | (get_bits(relaxed, 2, 19) << 5) | reg;
+ *loc32_b = 0xD503201F;
+ return;
+ }
+ // Couldn't do it. Just patch the two instructions normally:
+ patch_aarch64_21rx(location_a, value);
+ patch_aarch64_12x(location_b, value);
+}
+
// Relaxable 32-bit relative address.
void
patch_x86_64_32rx(unsigned char *location, uint64_t value)
RETURN = enum.auto()
SMALL_CONST_1 = enum.auto()
SMALL_CONST_2 = enum.auto()
+ SMALL_CONST_MASK = enum.auto()
+ LARGE_CONST_1 = enum.auto()
+ LARGE_CONST_2 = enum.auto()
OTHER = enum.auto()
kind: InstructionKind
name: str
text: str
+ register: str | None
target: str | None
def is_branch(self) -> bool:
def update_target(self, target: str) -> "Instruction":
assert self.target is not None
return Instruction(
- self.kind, self.name, self.text.replace(self.target, target), target
+ self.kind,
+ self.name,
+ self.text.replace(self.target, target),
+ self.register,
+ target,
)
def update_name_and_target(self, name: str, target: str) -> "Instruction":
self.kind,
name,
self.text.replace(self.name, name).replace(self.target, target),
+ self.register,
target,
)
globals: set[str] = dataclasses.field(default_factory=set)
_re_small_const_1 = _RE_NEVER_MATCH
_re_small_const_2 = _RE_NEVER_MATCH
+ _re_small_const_mask = _RE_NEVER_MATCH
+ _re_large_const_1 = _RE_NEVER_MATCH
+ _re_large_const_2 = _RE_NEVER_MATCH
const_reloc = "<Not supported>"
_frame_pointer_modify: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH
+ label_index: int = 0
def __post_init__(self) -> None:
# Split the code into a linked list of basic blocks. A basic block is an
def _parse_instruction(self, line: str) -> Instruction:
target = None
+ reg = None
if match := self._re_branch.match(line):
target = match["target"]
name = match["instruction"]
elif match := self._re_small_const_1.match(line):
target = match["value"]
name = match["instruction"]
+ reg = match["register"]
kind = InstructionKind.SMALL_CONST_1
elif match := self._re_small_const_2.match(line):
target = match["value"]
name = match["instruction"]
+ reg = match["register"]
kind = InstructionKind.SMALL_CONST_2
+ elif match := self._re_small_const_mask.match(line):
+ target = match["value"]
+ name = match["instruction"]
+ reg = match["register"]
+ if reg.startswith("w"):
+ reg = "x" + reg[1:]
+ kind = InstructionKind.SMALL_CONST_MASK
+ elif match := self._re_large_const_1.match(line):
+ target = match["value"]
+ name = match["instruction"]
+ reg = match["register"]
+ kind = InstructionKind.LARGE_CONST_1
+ elif match := self._re_large_const_2.match(line):
+ target = match["value"]
+ name = match["instruction"]
+ reg = match["register"]
+ kind = InstructionKind.LARGE_CONST_2
else:
name, *_ = line.split(" ")
kind = InstructionKind.OTHER
- return Instruction(kind, name, line, target)
+ return Instruction(kind, name, line, reg, target)
def _invert_branch(self, inst: Instruction, target: str) -> Instruction | None:
assert inst.is_branch()
name = target[len(self.symbol_prefix) :]
label = f"{self.symbol_prefix}{reloc}_JIT_RELOCATION_{name}_JIT_RELOCATION_{index}:"
block.instructions[-1] = Instruction(
- InstructionKind.OTHER, "", label, None
+ InstructionKind.OTHER, "", label, None, None
)
block.instructions.append(branch.update_target("0"))
- def _make_temp_label(self, index: int) -> Instruction:
- marker = f"jit_temp_{index}:"
- return Instruction(InstructionKind.OTHER, "", marker, None)
-
def _fixup_constants(self) -> None:
- if not self.supports_small_constants:
- return
- index = 0
- for block in self._blocks():
- fixed: list[Instruction] = []
- small_const_index = -1
- for inst in block.instructions:
- if inst.kind == InstructionKind.SMALL_CONST_1:
- marker = f"jit_pending_{inst.target}{index}:"
- fixed.append(self._make_temp_label(index))
- index += 1
- small_const_index = len(fixed)
- fixed.append(inst)
- elif inst.kind == InstructionKind.SMALL_CONST_2:
- if small_const_index < 0:
- fixed.append(inst)
- continue
- small_const_1 = fixed[small_const_index]
- if not self._small_consts_match(small_const_1, inst):
- small_const_index = -1
- fixed.append(inst)
- continue
- assert small_const_1.target is not None
- if small_const_1.target.endswith("16"):
- fixed[small_const_index] = self._make_temp_label(index)
- index += 1
- else:
- assert small_const_1.target.endswith("32")
- patch_kind, replacement = self._small_const_1(small_const_1)
- if replacement is not None:
- label = f"{self.const_reloc}{patch_kind}_JIT_RELOCATION_CONST{small_const_1.target[:-3]}_JIT_RELOCATION_{index}:"
- index += 1
- fixed[small_const_index - 1] = Instruction(
- InstructionKind.OTHER, "", label, None
- )
- fixed[small_const_index] = replacement
- patch_kind, replacement = self._small_const_2(inst)
- if replacement is not None:
- assert inst.target is not None
- label = f"{self.const_reloc}{patch_kind}_JIT_RELOCATION_CONST{inst.target[:-3]}_JIT_RELOCATION_{index}:"
- index += 1
- fixed.append(
- Instruction(InstructionKind.OTHER, "", label, None)
- )
- fixed.append(replacement)
- small_const_index = -1
- else:
- fixed.append(inst)
- block.instructions = fixed
-
- def _small_const_1(self, inst: Instruction) -> tuple[str, Instruction | None]:
- raise NotImplementedError()
-
- def _small_const_2(self, inst: Instruction) -> tuple[str, Instruction | None]:
- raise NotImplementedError()
-
- def _small_consts_match(self, inst1: Instruction, inst2: Instruction) -> bool:
- raise NotImplementedError()
+ "Fixup loading of constants. Overridden by OptimizerAArch64"
+ pass
def _validate(self) -> None:
for block in self._blocks():
supports_small_constants = True
_re_small_const_1 = re.compile(
- r"\s*(?P<instruction>adrp)\s+.*(?P<value>_JIT_OP(ARG|ERAND(0|1))_(16|32)).*"
+ r"\s*(?P<instruction>adrp)\s+(?P<register>x\d\d?),.*(?P<value>_JIT_OP(ARG|ERAND(0|1))_(16|32)).*"
)
_re_small_const_2 = re.compile(
- r"\s*(?P<instruction>ldr)\s+.*(?P<value>_JIT_OP(ARG|ERAND(0|1))_(16|32)).*"
+ r"\s*(?P<instruction>ldr)\s+(?P<register>x\d\d?),.*(?P<value>_JIT_OP(ARG|ERAND(0|1))_(16|32)).*"
+ )
+ _re_small_const_mask = re.compile(
+ r"\s*(?P<instruction>and)\s+[xw]\d\d?, *(?P<register>[xw]\d\d?).*(?P<value>0xffff)"
+ )
+ _re_large_const_1 = re.compile(
+ r"\s*(?P<instruction>adrp)\s+(?P<register>x\d\d?),.*:got:(?P<value>[_A-Za-z0-9]+).*"
+ )
+ _re_large_const_2 = re.compile(
+ r"\s*(?P<instruction>ldr)\s+(?P<register>x\d\d?),.*:got_lo12:(?P<value>[_A-Za-z0-9]+).*"
)
const_reloc = "CUSTOM_AARCH64_CONST"
_frame_pointer_modify = re.compile(r"\s*stp\s+x29.*")
- def _get_reg(self, inst: Instruction) -> str:
- _, rest = inst.text.split(inst.name)
- reg, *_ = rest.split(",")
- return reg.strip()
-
- def _small_const_1(self, inst: Instruction) -> tuple[str, Instruction | None]:
- assert inst.kind is InstructionKind.SMALL_CONST_1
- assert inst.target is not None
- if "16" in inst.target:
- return "", None
- pre, _ = inst.text.split(inst.name)
- return "16a", Instruction(
- InstructionKind.OTHER, "movz", f"{pre}movz {self._get_reg(inst)}, 0", None
+ def _make_temp_label(self, note: object = None) -> Instruction:
+ marker = f"jit_temp_{self.label_index}:"
+ if note is not None:
+ marker = f"{marker[:-1]}_{note}:"
+ self.label_index += 1
+ return Instruction(InstructionKind.OTHER, "", marker, None, None)
+
+ def _both_registers_same(self, inst: Instruction) -> bool:
+ reg = inst.register
+ assert reg is not None
+ if reg not in inst.text:
+ reg = "w" + reg[1:]
+ return inst.text.count(reg) == 2
+
+ def _fixup_small_constant_pair(
+ self, output: list[Instruction], label_index: int, inst: Instruction
+ ) -> str | None:
+ first = output[label_index + 1]
+ reg = first.register
+ if reg is None or inst.register != reg:
+ output.append(
+ Instruction(InstructionKind.OTHER, "", "# registers differ", None, None)
+ )
+ output.append(inst)
+ return None
+ assert first.target is not None
+ if first.target != inst.target:
+ output.append(
+ Instruction(InstructionKind.OTHER, "", "# targets differ", None, None)
+ )
+ output.append(inst)
+ return None
+ if not self._both_registers_same(inst):
+ output.append(
+ Instruction(
+ InstructionKind.OTHER, "", "# not same register", None, None
+ )
+ )
+ output.append(inst)
+ return None
+ pre, _ = first.text.split(first.name)
+ output[label_index + 1] = Instruction(
+ InstructionKind.OTHER,
+ "movz",
+ f"{pre}movz {reg}, 0",
+ reg,
+ None,
)
-
- def _small_const_2(self, inst: Instruction) -> tuple[str, Instruction | None]:
- assert inst.kind is InstructionKind.SMALL_CONST_2
- assert inst.target is not None
- pre, _ = inst.text.split(inst.name)
- if "16" in inst.target:
- return "16a", Instruction(
- InstructionKind.OTHER,
- "movz",
- f"{pre}movz {self._get_reg(inst)}, 0",
- None,
+ label_text = f"{self.const_reloc}16a_JIT_RELOCATION_CONST{first.target[:-3]}_JIT_RELOCATION_{self.label_index}:"
+ self.label_index += 1
+ output[label_index] = Instruction(
+ InstructionKind.OTHER, "", label_text, None, None
+ )
+ assert first.target.endswith("16") or first.target.endswith("32")
+ if first.target.endswith("32"):
+ label_text = f"{self.const_reloc}16b_JIT_RELOCATION_CONST{first.target[:-3]}_JIT_RELOCATION_{self.label_index}:"
+ self.label_index += 1
+ output.append(
+ Instruction(InstructionKind.OTHER, "", label_text, None, None)
)
- else:
- return "16b", Instruction(
- InstructionKind.OTHER,
- "movk",
- f"{pre}movk {self._get_reg(inst)}, 0, lsl #16",
- None,
+ pre, _ = inst.text.split(inst.name)
+ output.append(
+ Instruction(
+ InstructionKind.OTHER,
+ "movk",
+ f"{pre}movk {reg}, 0, lsl #16",
+ reg,
+ None,
+ )
)
+ return reg
+
+ def may_use_reg(self, inst: Instruction, reg: str | None) -> bool:
+ "Return False if `reg` is not explicitly used by this instruction"
+ if reg is None:
+ return False
+ assert reg.startswith("w") or reg.startswith("x")
+ xreg = f"x{reg[1:]}"
+ wreg = f"w{reg[1:]}"
+ if wreg in inst.text:
+ return True
+ if xreg in inst.text:
+ # Exclude false positives like 0x80 for x8
+ count = inst.text.count(xreg)
+ number_count = inst.text.count("0" + xreg)
+ return count > number_count
+ return False
+
+ def _fixup_large_constant_pair(
+ self, output: list[Instruction], label_index: int, inst: Instruction
+ ) -> None:
+ first = output[label_index + 1]
+ reg = first.register
+ if reg is None or inst.register != reg:
+ output.append(inst)
+ return
+ assert first.target is not None
+ if first.target != inst.target:
+ output.append(inst)
+ return
+ label = f"{self.const_reloc}33a_JIT_PAIR_{first.target}_JIT_PAIR_{self.label_index}:"
+ output[label_index] = Instruction(InstructionKind.OTHER, "", label, None, None)
+ label = (
+ f"{self.const_reloc}33b_JIT_PAIR_{inst.target}_JIT_PAIR_{self.label_index}:"
+ )
+ self.label_index += 1
+ output.append(Instruction(InstructionKind.OTHER, "", label, None, None))
+ output.append(inst)
+
+ def _fixup_mask(self, output: list[Instruction], inst: Instruction) -> None:
+ if self._both_registers_same(inst):
+ # Nop
+ pass
+ else:
+ output.append(inst)
- def _small_consts_match(self, inst1: Instruction, inst2: Instruction) -> bool:
- reg1 = self._get_reg(inst1)
- reg2 = self._get_reg(inst2)
- return reg1 == reg2
+ def _fixup_constants(self) -> None:
+ for block in self._blocks():
+ fixed: list[Instruction] = []
+ small_const_part: dict[str, int | None] = {}
+ small_const_whole: dict[str, str | None] = {}
+ large_const_part: dict[str, int | None] = {}
+ for inst in block.instructions:
+ if inst.kind == InstructionKind.SMALL_CONST_1:
+ assert inst.register is not None
+ small_const_part[inst.register] = len(fixed)
+ small_const_whole[inst.register] = None
+ large_const_part[inst.register] = None
+ fixed.append(self._make_temp_label(inst.register))
+ fixed.append(inst)
+ elif inst.kind == InstructionKind.SMALL_CONST_2:
+ assert inst.register is not None
+ index = small_const_part.get(inst.register)
+ small_const_part[inst.register] = None
+ if index is None:
+ fixed.append(inst)
+ continue
+ small_const_whole[inst.register] = self._fixup_small_constant_pair(
+ fixed, index, inst
+ )
+ small_const_part[inst.register] = None
+ elif inst.kind == InstructionKind.SMALL_CONST_MASK:
+ assert inst.register is not None
+ reg = small_const_whole.get(inst.register)
+ if reg is not None:
+ self._fixup_mask(fixed, inst)
+ else:
+ fixed.append(inst)
+ elif inst.kind == InstructionKind.LARGE_CONST_1:
+ assert inst.register is not None
+ small_const_part[inst.register] = None
+ small_const_whole[inst.register] = None
+ large_const_part[inst.register] = len(fixed)
+ fixed.append(self._make_temp_label())
+ fixed.append(inst)
+ elif inst.kind == InstructionKind.LARGE_CONST_2:
+ assert inst.register is not None
+ small_const_part[inst.register] = None
+ small_const_whole[inst.register] = None
+ index = large_const_part.get(inst.register)
+ large_const_part[inst.register] = None
+ if index is None:
+ fixed.append(inst)
+ continue
+ self._fixup_large_constant_pair(fixed, index, inst)
+ else:
+ for reg in small_const_part:
+ if self.may_use_reg(inst, reg):
+ small_const_part[reg] = None
+ for reg in small_const_whole:
+ if self.may_use_reg(inst, reg):
+ small_const_whole[reg] = None
+ for reg in small_const_part:
+ if self.may_use_reg(inst, reg):
+ large_const_part[reg] = None
+ fixed.append(inst)
+ block.instructions = fixed
class OptimizerX86(Optimizer): # pylint: disable = too-few-public-methods
_PATCH_FUNCS = {
# aarch64-apple-darwin:
"ARM64_RELOC_BRANCH26": "patch_aarch64_26r",
- "ARM64_RELOC_GOT_LOAD_PAGE21": "patch_aarch64_21r",
- "ARM64_RELOC_GOT_LOAD_PAGEOFF12": "patch_aarch64_12",
+ "ARM64_RELOC_GOT_LOAD_PAGE21": "patch_aarch64_21rx",
+ "ARM64_RELOC_GOT_LOAD_PAGEOFF12": "patch_aarch64_12x",
"ARM64_RELOC_PAGE21": "patch_aarch64_21r",
"ARM64_RELOC_PAGEOFF12": "patch_aarch64_12",
"ARM64_RELOC_UNSIGNED": "patch_64",
+ # custom aarch64, both darwin and linux:
"CUSTOM_AARCH64_BRANCH19": "patch_aarch64_19r",
"CUSTOM_AARCH64_CONST16a": "patch_aarch64_16a",
"CUSTOM_AARCH64_CONST16b": "patch_aarch64_16b",
# aarch64-pc-windows-msvc:
"IMAGE_REL_ARM64_BRANCH19": "patch_aarch64_19r",
"IMAGE_REL_ARM64_BRANCH26": "patch_aarch64_26r",
- "IMAGE_REL_ARM64_PAGEBASE_REL21": "patch_aarch64_21r",
+ "IMAGE_REL_ARM64_PAGEBASE_REL21": "patch_aarch64_21rx",
"IMAGE_REL_ARM64_PAGEOFFSET_12A": "patch_aarch64_12",
- "IMAGE_REL_ARM64_PAGEOFFSET_12L": "patch_aarch64_12",
+ "IMAGE_REL_ARM64_PAGEOFFSET_12L": "patch_aarch64_12x",
# i686-pc-windows-msvc:
"IMAGE_REL_I386_DIR32": "patch_32",
"IMAGE_REL_I386_REL32": "patch_x86_64_32rx",
# aarch64-unknown-linux-gnu:
"R_AARCH64_ABS64": "patch_64",
"R_AARCH64_ADD_ABS_LO12_NC": "patch_aarch64_12",
- "R_AARCH64_ADR_GOT_PAGE": "patch_aarch64_21r",
+ "R_AARCH64_ADR_GOT_PAGE": "patch_aarch64_21rx",
"R_AARCH64_ADR_PREL_PG_HI21": "patch_aarch64_21r",
"R_AARCH64_CALL26": "patch_aarch64_26r",
"R_AARCH64_CONDBR19": "patch_aarch64_19r",
"R_AARCH64_JUMP26": "patch_aarch64_26r",
- "R_AARCH64_LD64_GOT_LO12_NC": "patch_aarch64_12",
+ "R_AARCH64_LD64_GOT_LO12_NC": "patch_aarch64_12x",
"R_AARCH64_MOVW_UABS_G0_NC": "patch_aarch64_16a",
"R_AARCH64_MOVW_UABS_G1_NC": "patch_aarch64_16b",
"R_AARCH64_MOVW_UABS_G2_NC": "patch_aarch64_16c",
custom_location: str = ""
custom_value: str = ""
func: str = dataclasses.field(init=False)
+ offset2: int = -1
+ void: bool = False
# Convenience method:
replace = dataclasses.replace
def __post_init__(self) -> None:
self.func = _PATCH_FUNCS[self.kind]
+ def fold(self, other: typing.Self) -> None:
+ """Combine two holes into a single hole."""
+ assert (
+ self.func == "patch_aarch64_12x" and other.func == "patch_aarch64_21rx"
+ ), (self.func, other.func)
+ assert self.value == other.value
+ assert self.symbol == other.symbol
+ assert self.addend == other.addend
+ self.func = "patch_aarch64_33rx"
+ self.offset2 = other.offset
+ other.void = True
+
def as_c(self, where: str) -> str:
"""Dump this hole as a call to a patch_* function."""
+ if self.void:
+ return ""
if self.custom_location:
location = self.custom_location
else:
value += f"{_signed(self.addend):#x}"
if self.need_state:
return f"{self.func}({location}, {value}, state);"
+ if self.offset2 >= 0:
+ first_location = f"{where} + {self.offset2:#x}"
+ return f"{self.func}({first_location}, {location}, {value});"
return f"{self.func}({location}, {value});"
_got_entries: set[int] = dataclasses.field(default_factory=set, init=False)
def convert_labels_to_relocations(self) -> None:
+ holes_by_offset: dict[int, Hole] = {}
+ first_in_pair: dict[str, Hole] = {}
+ for hole in self.code.holes:
+ holes_by_offset[hole.offset] = hole
for name, hole_plus in self.symbols.items():
if isinstance(name, str) and "_JIT_RELOCATION_" in name:
_, offset = hole_plus
int(offset), typing.cast(_schema.HoleKind, reloc), value, symbol, 0
)
self.code.holes.append(hole)
+ elif isinstance(name, str) and "_JIT_PAIR_" in name:
+ _, offset = hole_plus
+ reloc, target, index = name.split("_JIT_PAIR_")
+ if offset in holes_by_offset:
+ hole = holes_by_offset[offset]
+ if "33a" in reloc:
+ first_in_pair[index] = hole
+ elif "33b" in reloc and index in first_in_pair:
+ first = first_in_pair[index]
+ hole.fold(first)
def process_relocations(self, known_symbols: dict[str, int]) -> None:
"""Fix up all GOT and internal relocations for this stencil group."""