From e639055f1f30311db91cafb36e408cc727c7d445 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Thu, 15 Jan 2026 20:58:36 -0800 Subject: [PATCH] cxl/region: Translate DPA->HPA in unaligned MOD3 regions The CXL driver implementation of DPA->HPA address translation depends on a region's starting address always being aligned to Host Bridge Interleave Ways * 256MB. The driver follows the decode methods defined in the CXL Spec[1] and expanded upon in the CXL Driver Writers Guide[2], which describe bit manipulations based on power-of-2 alignment to translate a DPA to an HPA. With the introduction of MOD3 interleave way support, platforms may create regions at starting addresses that are not power-of-2 aligned. This allows platforms to avoid gaps in the memory map, but addresses within those regions cannot be translated using the existing bit manipulation method. Introduce an unaligned translation method for DPA->HPA that reconstructs an HPA by restoring the address first at the port level and then at the host bridge level. [1] CXL Spec 4.0 8.2.4.20.13 Implementation Note Device Decoder Logic [2] CXL Type 3 Memory Software Guide 1.1 2.13.25 DPA to HPA Translation Suggested-by: Qing Huang Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Signed-off-by: Alison Schofield Link: https://patch.msgid.link/e7c53215bf69f2ff1ae7e58bcc49ca387b7b0299.1768538962.git.alison.schofield@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 160 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 155 insertions(+), 5 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index ae899f68551f7..cdfa454b940db 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3112,13 +3112,146 @@ u64 cxl_calculate_hpa_offset(u64 dpa_offset, int pos, u8 eiw, u16 eig) } EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_hpa_offset, "cxl_translate"); +static int decode_pos(int region_ways, int hb_ways, int pos, int *pos_port, + int *pos_hb) +{ + int devices_per_hb; + + /* + * Decode for 3-6-12 way interleaves as defined in the CXL + * Spec 4.0 9.13.1.1 Legal Interleaving Configurations. + * Region creation should prevent invalid combinations but + * sanity check here to avoid a silent bad decode. + */ + switch (hb_ways) { + case 3: + if (region_ways != 3 && region_ways != 6 && region_ways != 12) + return -EINVAL; + break; + case 6: + if (region_ways != 6 && region_ways != 12) + return -EINVAL; + break; + case 12: + if (region_ways != 12) + return -EINVAL; + break; + default: + return -EINVAL; + } + /* + * Each host bridge contributes an equal number of endpoints + * that are laid out contiguously per host bridge. Modulo + * selects the port within a host bridge and division selects + * the host bridge position. + */ + devices_per_hb = region_ways / hb_ways; + *pos_port = pos % devices_per_hb; + *pos_hb = pos / devices_per_hb; + + return 0; +} + +/* + * restore_parent() reconstruct the address in parent + * + * This math, specifically the bitmask creation 'mask = gran - 1' relies + * on the CXL Spec requirement that interleave granularity is always a + * power of two. + * + * [mask] isolate the offset with the granularity + * [addr & ~mask] remove the offset leaving the aligned portion + * [* ways] distribute across all interleave ways + * [+ (pos * gran)] add the positional offset + * [+ (addr & mask)] restore the masked offset + */ +static u64 restore_parent(u64 addr, u64 pos, u64 gran, u64 ways) +{ + u64 mask = gran - 1; + + return ((addr & ~mask) * ways) + (pos * gran) + (addr & mask); +} + +/* + * unaligned_dpa_to_hpa() translates a DPA to HPA when the region resource + * start address is not aligned at Host Bridge Interleave Ways * 256MB. + * + * Unaligned start addresses only occur with MOD3 interleaves. All power- + * of-two interleaves are guaranteed aligned. + */ +static u64 unaligned_dpa_to_hpa(struct cxl_decoder *cxld, + struct cxl_region_params *p, int pos, u64 dpa) +{ + int ways_port = p->interleave_ways / cxld->interleave_ways; + int gran_port = p->interleave_granularity; + int gran_hb = cxld->interleave_granularity; + int ways_hb = cxld->interleave_ways; + int pos_port, pos_hb, gran_shift; + u64 hpa_port = 0; + + /* Decode an endpoint 'pos' into port and host-bridge components */ + if (decode_pos(p->interleave_ways, ways_hb, pos, &pos_port, &pos_hb)) { + dev_dbg(&cxld->dev, "not supported for region ways:%d\n", + p->interleave_ways); + return ULLONG_MAX; + } + + /* Restore the port parent address if needed */ + if (gran_hb != gran_port) + hpa_port = restore_parent(dpa, pos_port, gran_port, ways_port); + else + hpa_port = dpa; + + /* + * Complete the HPA reconstruction by restoring the address as if + * each HB position is a candidate. Test against expected pos_hb + * to confirm match. + */ + gran_shift = ilog2(gran_hb); + for (int position = 0; position < ways_hb; position++) { + u64 shifted, hpa; + + hpa = restore_parent(hpa_port, position, gran_hb, ways_hb); + hpa += p->res->start; + + shifted = hpa >> gran_shift; + if (do_div(shifted, ways_hb) == pos_hb) + return hpa; + } + + dev_dbg(&cxld->dev, "fail dpa:%#llx region:%pr pos:%d\n", dpa, p->res, + pos); + dev_dbg(&cxld->dev, " port-w/g/p:%d/%d/%d hb-w/g/p:%d/%d/%d\n", + ways_port, gran_port, pos_port, ways_hb, gran_hb, pos_hb); + + return ULLONG_MAX; +} + +static bool region_is_unaligned_mod3(struct cxl_region *cxlr) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; + struct cxl_region_params *p = &cxlr->params; + int hbiw = cxld->interleave_ways; + u64 rem; + + if (is_power_of_2(hbiw)) + return false; + + div64_u64_rem(p->res->start, (u64)hbiw * SZ_256M, &rem); + + return (rem != 0); +} + u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, u64 dpa) { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; struct cxl_region_params *p = &cxlr->params; struct cxl_endpoint_decoder *cxled = NULL; u64 dpa_offset, hpa_offset, hpa; + bool unaligned = false; u16 eig = 0; u8 eiw = 0; int pos; @@ -3132,15 +3265,32 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, if (!cxled) return ULLONG_MAX; + dpa_offset = dpa - cxl_dpa_resource_start(cxled); + + /* Unaligned calc for MOD3 interleaves not hbiw * 256MB aligned */ + unaligned = region_is_unaligned_mod3(cxlr); + if (unaligned) { + hpa = unaligned_dpa_to_hpa(cxld, p, cxled->pos, dpa_offset); + if (hpa == ULLONG_MAX) + return ULLONG_MAX; + + goto skip_aligned; + } + /* + * Aligned calc for all power-of-2 interleaves and for MOD3 + * interleaves that are aligned at hbiw * 256MB + */ pos = cxled->pos; ways_to_eiw(p->interleave_ways, &eiw); granularity_to_eig(p->interleave_granularity, &eig); - dpa_offset = dpa - cxl_dpa_resource_start(cxled); hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, eiw, eig); /* Apply the hpa_offset to the region base address */ - hpa = hpa_offset + p->res->start + p->cache_size; + hpa = hpa_offset + p->res->start; + +skip_aligned: + hpa += p->cache_size; /* Root decoder translation overrides typical modulo decode */ if (cxlrd->ops.hpa_to_spa) @@ -3151,9 +3301,9 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, "Addr trans fail: hpa 0x%llx not in region\n", hpa); return ULLONG_MAX; } - - /* Simple chunk check, by pos & gran, only applies to modulo decodes */ - if (!cxlrd->ops.hpa_to_spa && !cxl_is_hpa_in_chunk(hpa, cxlr, pos)) + /* Chunk check applies to aligned modulo decodes only */ + if (!unaligned && !cxlrd->ops.hpa_to_spa && + !cxl_is_hpa_in_chunk(hpa, cxlr, pos)) return ULLONG_MAX; return hpa; -- 2.47.3