Skip to content

Commit

Permalink
Merge tag 'cxl-fixes-6.8-rc6' of git://git.kernel.org/pub/scm/linux/k…
Browse files Browse the repository at this point in the history
…ernel/git/cxl/cxl

Pull cxl fixes from Dan Williams:
 "A collection of significant fixes for the CXL subsystem.

  The largest change in this set, that bordered on "new development", is
  the fix for the fact that the location of the new qos_class attribute
  did not match the Documentation. The fix ends up deleting more code
  than it added, and it has a new unit test to backstop basic errors in
  this interface going forward. So the "red-diff" and unit test saved
  the "rip it out and try again" response.

  In contrast, the new notification path for firmware reported CXL
  errors (CXL CPER notifications) has a locking context bug that can not
  be fixed with a red-diff. Given where the release cycle stands, it is
  not comfortable to squeeze in that fix in these waning days. So, that
  receives the "back it out and try again later" treatment.

  There is a regression fix in the code that establishes memory NUMA
  nodes for platform CXL regions. That has an ack from x86 folks. There
  are a couple more fixups for Linux to understand (reassemble) CXL
  regions instantiated by platform firmware. The policy around platforms
  that do not match host-physical-address with system-physical-address
  (i.e. systems that have an address translation mechanism between the
  address range reported in the ACPI CEDT.CFMWS and endpoint decoders)
  has been softened to abort driver load rather than teardown the memory
  range (can cause system hangs). Lastly, there is a robustness /
  regression fix for cases where the driver would previously continue in
  the face of error, and a fixup for PCI error notification handling.

  Summary:

   - Fix NUMA initialization from ACPI CEDT.CFMWS

   - Fix region assembly failures due to async init order

   - Fix / simplify export of qos_class information

   - Fix cxl_acpi initialization vs single-window-init failures

   - Fix handling of repeated 'pci_channel_io_frozen' notifications

   - Workaround platforms that violate host-physical-address ==
     system-physical address assumptions

   - Defer CXL CPER notification handling to v6.9"

* tag 'cxl-fixes-6.8-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl:
  cxl/acpi: Fix load failures due to single window creation failure
  acpi/ghes: Remove CXL CPER notifications
  cxl/pci: Fix disabling memory if DVSEC CXL Range does not match a CFMWS window
  cxl/test: Add support for qos_class checking
  cxl: Fix sysfs export of qos_class for memdev
  cxl: Remove unnecessary type cast in cxl_qos_class_verify()
  cxl: Change 'struct cxl_memdev_state' *_perf_list to single 'struct cxl_dpa_perf'
  cxl/region: Allow out of order assembly of autodiscovered regions
  cxl/region: Handle endpoint decoders in cxl_region_find_decoder()
  x86/numa: Fix the sort compare func used in numa_fill_memblks()
  x86/numa: Fix the address overlap check in numa_fill_memblks()
  cxl/pci: Skip to handle RAS errors if CXL.mem device is detached
  • Loading branch information
torvalds committed Feb 24, 2024
2 parents f2e367d + 5c6224b commit ac389bc
Show file tree
Hide file tree
Showing 19 changed files with 289 additions and 334 deletions.
21 changes: 8 additions & 13 deletions arch/x86/mm/numa.c
Original file line number Diff line number Diff line change
Expand Up @@ -934,7 +934,7 @@ static int __init cmp_memblk(const void *a, const void *b)
const struct numa_memblk *ma = *(const struct numa_memblk **)a;
const struct numa_memblk *mb = *(const struct numa_memblk **)b;

return ma->start - mb->start;
return (ma->start > mb->start) - (ma->start < mb->start);
}

static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
Expand All @@ -944,14 +944,12 @@ static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
* @start: address to begin fill
* @end: address to end fill
*
* Find and extend numa_meminfo memblks to cover the @start-@end
* physical address range, such that the first memblk includes
* @start, the last memblk includes @end, and any gaps in between
* are filled.
* Find and extend numa_meminfo memblks to cover the physical
* address range @start-@end
*
* RETURNS:
* 0 : Success
* NUMA_NO_MEMBLK : No memblk exists in @start-@end range
* NUMA_NO_MEMBLK : No memblks exist in address range @start-@end
*/

int __init numa_fill_memblks(u64 start, u64 end)
Expand All @@ -963,17 +961,14 @@ int __init numa_fill_memblks(u64 start, u64 end)

/*
* Create a list of pointers to numa_meminfo memblks that
* overlap start, end. Exclude (start == bi->end) since
* end addresses in both a CFMWS range and a memblk range
* are exclusive.
*
* This list of pointers is used to make in-place changes
* that fill out the numa_meminfo memblks.
* overlap start, end. The list is used to make in-place
* changes that fill out the numa_meminfo memblks.
*/
for (int i = 0; i < mi->nr_blks; i++) {
struct numa_memblk *bi = &mi->blk[i];

if (start < bi->end && end >= bi->start) {
if (memblock_addrs_overlap(start, end - start, bi->start,
bi->end - bi->start)) {
blk[count] = &mi->blk[i];
count++;
}
Expand Down
63 changes: 0 additions & 63 deletions drivers/acpi/apei/ghes.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#include <linux/interrupt.h>
#include <linux/timer.h>
#include <linux/cper.h>
#include <linux/cxl-event.h>
#include <linux/platform_device.h>
#include <linux/mutex.h>
#include <linux/ratelimit.h>
Expand Down Expand Up @@ -674,52 +673,6 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
schedule_work(&entry->work);
}

/*
* Only a single callback can be registered for CXL CPER events.
*/
static DECLARE_RWSEM(cxl_cper_rw_sem);
static cxl_cper_callback cper_callback;

static void cxl_cper_post_event(enum cxl_event_type event_type,
struct cxl_cper_event_rec *rec)
{
if (rec->hdr.length <= sizeof(rec->hdr) ||
rec->hdr.length > sizeof(*rec)) {
pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n",
rec->hdr.length);
return;
}

if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) {
pr_err(FW_WARN "CXL CPER invalid event\n");
return;
}

guard(rwsem_read)(&cxl_cper_rw_sem);
if (cper_callback)
cper_callback(event_type, rec);
}

int cxl_cper_register_callback(cxl_cper_callback callback)
{
guard(rwsem_write)(&cxl_cper_rw_sem);
if (cper_callback)
return -EINVAL;
cper_callback = callback;
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_cper_register_callback, CXL);

int cxl_cper_unregister_callback(cxl_cper_callback callback)
{
guard(rwsem_write)(&cxl_cper_rw_sem);
if (callback != cper_callback)
return -EINVAL;
cper_callback = NULL;
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_callback, CXL);

static bool ghes_do_proc(struct ghes *ghes,
const struct acpi_hest_generic_status *estatus)
{
Expand Down Expand Up @@ -754,22 +707,6 @@ static bool ghes_do_proc(struct ghes *ghes,
}
else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
queued = ghes_handle_arm_hw_error(gdata, sev, sync);
} else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) {
struct cxl_cper_event_rec *rec =
acpi_hest_get_payload(gdata);

cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec);
} else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) {
struct cxl_cper_event_rec *rec =
acpi_hest_get_payload(gdata);

cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec);
} else if (guid_equal(sec_type,
&CPER_SEC_CXL_MEM_MODULE_GUID)) {
struct cxl_cper_event_rec *rec =
acpi_hest_get_payload(gdata);

cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec);
} else {
void *err = acpi_hest_get_payload(gdata);

Expand Down
46 changes: 28 additions & 18 deletions drivers/cxl/acpi.c
Original file line number Diff line number Diff line change
Expand Up @@ -316,31 +316,27 @@ static const struct cxl_root_ops acpi_root_ops = {
.qos_class = cxl_acpi_qos_class,
};

static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
const unsigned long end)
static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
struct cxl_cfmws_context *ctx)
{
int target_map[CXL_DECODER_MAX_INTERLEAVE];
struct cxl_cfmws_context *ctx = arg;
struct cxl_port *root_port = ctx->root_port;
struct resource *cxl_res = ctx->cxl_res;
struct cxl_cxims_context cxims_ctx;
struct cxl_root_decoder *cxlrd;
struct device *dev = ctx->dev;
struct acpi_cedt_cfmws *cfmws;
cxl_calc_hb_fn cxl_calc_hb;
struct cxl_decoder *cxld;
unsigned int ways, i, ig;
struct resource *res;
int rc;

cfmws = (struct acpi_cedt_cfmws *) header;

rc = cxl_acpi_cfmws_verify(dev, cfmws);
if (rc) {
dev_err(dev, "CFMWS range %#llx-%#llx not registered\n",
cfmws->base_hpa,
cfmws->base_hpa + cfmws->window_size - 1);
return 0;
return rc;
}

rc = eiw_to_ways(cfmws->interleave_ways, &ways);
Expand Down Expand Up @@ -376,7 +372,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,

cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb);
if (IS_ERR(cxlrd))
return 0;
return PTR_ERR(cxlrd);

cxld = &cxlrd->cxlsd.cxld;
cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions);
Expand Down Expand Up @@ -420,16 +416,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
put_device(&cxld->dev);
else
rc = cxl_decoder_autoremove(dev, cxld);
if (rc) {
dev_err(dev, "Failed to add decode range: %pr", res);
return rc;
}
dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n",
dev_name(&cxld->dev),
phys_to_target_node(cxld->hpa_range.start),
cxld->hpa_range.start, cxld->hpa_range.end);

return 0;
return rc;

err_insert:
kfree(res->name);
Expand All @@ -438,6 +425,29 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
return -ENOMEM;
}

static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
const unsigned long end)
{
struct acpi_cedt_cfmws *cfmws = (struct acpi_cedt_cfmws *)header;
struct cxl_cfmws_context *ctx = arg;
struct device *dev = ctx->dev;
int rc;

rc = __cxl_parse_cfmws(cfmws, ctx);
if (rc)
dev_err(dev,
"Failed to add decode range: [%#llx - %#llx] (%d)\n",
cfmws->base_hpa,
cfmws->base_hpa + cfmws->window_size - 1, rc);
else
dev_dbg(dev, "decode range: node: %d range [%#llx - %#llx]\n",
phys_to_target_node(cfmws->base_hpa), cfmws->base_hpa,
cfmws->base_hpa + cfmws->window_size - 1);

/* never fail cxl_acpi load for a single window failure */
return 0;
}

__mock struct acpi_device *to_cxl_host_bridge(struct device *host,
struct device *dev)
{
Expand Down
86 changes: 26 additions & 60 deletions drivers/cxl/core/cdat.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,40 +210,19 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
return 0;
}

static void add_perf_entry(struct device *dev, struct dsmas_entry *dent,
struct list_head *list)
static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
struct cxl_dpa_perf *dpa_perf)
{
struct cxl_dpa_perf *dpa_perf;

dpa_perf = kzalloc(sizeof(*dpa_perf), GFP_KERNEL);
if (!dpa_perf)
return;

dpa_perf->dpa_range = dent->dpa_range;
dpa_perf->coord = dent->coord;
dpa_perf->qos_class = dent->qos_class;
list_add_tail(&dpa_perf->list, list);
dev_dbg(dev,
"DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n",
dent->dpa_range.start, dpa_perf->qos_class,
dent->coord.read_bandwidth, dent->coord.write_bandwidth,
dent->coord.read_latency, dent->coord.write_latency);
}

static void free_perf_ents(void *data)
{
struct cxl_memdev_state *mds = data;
struct cxl_dpa_perf *dpa_perf, *n;
LIST_HEAD(discard);

list_splice_tail_init(&mds->ram_perf_list, &discard);
list_splice_tail_init(&mds->pmem_perf_list, &discard);
list_for_each_entry_safe(dpa_perf, n, &discard, list) {
list_del(&dpa_perf->list);
kfree(dpa_perf);
}
}

static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
struct xarray *dsmas_xa)
{
Expand All @@ -263,16 +242,14 @@ static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
xa_for_each(dsmas_xa, index, dent) {
if (resource_size(&cxlds->ram_res) &&
range_contains(&ram_range, &dent->dpa_range))
add_perf_entry(dev, dent, &mds->ram_perf_list);
update_perf_entry(dev, dent, &mds->ram_perf);
else if (resource_size(&cxlds->pmem_res) &&
range_contains(&pmem_range, &dent->dpa_range))
add_perf_entry(dev, dent, &mds->pmem_perf_list);
update_perf_entry(dev, dent, &mds->pmem_perf);
else
dev_dbg(dev, "no partition for dsmas dpa: %#llx\n",
dent->dpa_range.start);
}

devm_add_action_or_reset(&cxlds->cxlmd->dev, free_perf_ents, mds);
}

static int match_cxlrd_qos_class(struct device *dev, void *data)
Expand All @@ -293,24 +270,24 @@ static int match_cxlrd_qos_class(struct device *dev, void *data)
return 0;
}

static void cxl_qos_match(struct cxl_port *root_port,
struct list_head *work_list,
struct list_head *discard_list)
static void reset_dpa_perf(struct cxl_dpa_perf *dpa_perf)
{
struct cxl_dpa_perf *dpa_perf, *n;
*dpa_perf = (struct cxl_dpa_perf) {
.qos_class = CXL_QOS_CLASS_INVALID,
};
}

list_for_each_entry_safe(dpa_perf, n, work_list, list) {
int rc;
static bool cxl_qos_match(struct cxl_port *root_port,
struct cxl_dpa_perf *dpa_perf)
{
if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
return false;

if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
return;
if (!device_for_each_child(&root_port->dev, &dpa_perf->qos_class,
match_cxlrd_qos_class))
return false;

rc = device_for_each_child(&root_port->dev,
(void *)&dpa_perf->qos_class,
match_cxlrd_qos_class);
if (!rc)
list_move_tail(&dpa_perf->list, discard_list);
}
return true;
}

static int match_cxlrd_hb(struct device *dev, void *data)
Expand All @@ -334,23 +311,10 @@ static int match_cxlrd_hb(struct device *dev, void *data)
return 0;
}

static void discard_dpa_perf(struct list_head *list)
{
struct cxl_dpa_perf *dpa_perf, *n;

list_for_each_entry_safe(dpa_perf, n, list, list) {
list_del(&dpa_perf->list);
kfree(dpa_perf);
}
}
DEFINE_FREE(dpa_perf, struct list_head *, if (!list_empty(_T)) discard_dpa_perf(_T))

static int cxl_qos_class_verify(struct cxl_memdev *cxlmd)
{
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
LIST_HEAD(__discard);
struct list_head *discard __free(dpa_perf) = &__discard;
struct cxl_port *root_port;
int rc;

Expand All @@ -363,16 +327,17 @@ static int cxl_qos_class_verify(struct cxl_memdev *cxlmd)
root_port = &cxl_root->port;

/* Check that the QTG IDs are all sane between end device and root decoders */
cxl_qos_match(root_port, &mds->ram_perf_list, discard);
cxl_qos_match(root_port, &mds->pmem_perf_list, discard);
if (!cxl_qos_match(root_port, &mds->ram_perf))
reset_dpa_perf(&mds->ram_perf);
if (!cxl_qos_match(root_port, &mds->pmem_perf))
reset_dpa_perf(&mds->pmem_perf);

/* Check to make sure that the device's host bridge is under a root decoder */
rc = device_for_each_child(&root_port->dev,
(void *)cxlmd->endpoint->host_bridge,
match_cxlrd_hb);
cxlmd->endpoint->host_bridge, match_cxlrd_hb);
if (!rc) {
list_splice_tail_init(&mds->ram_perf_list, discard);
list_splice_tail_init(&mds->pmem_perf_list, discard);
reset_dpa_perf(&mds->ram_perf);
reset_dpa_perf(&mds->pmem_perf);
}

return rc;
Expand Down Expand Up @@ -417,6 +382,7 @@ void cxl_endpoint_parse_cdat(struct cxl_port *port)

cxl_memdev_set_qos_class(cxlds, dsmas_xa);
cxl_qos_class_verify(cxlmd);
cxl_memdev_update_perf(cxlmd);
}
EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL);

Expand Down
4 changes: 2 additions & 2 deletions drivers/cxl/core/mbox.c
Original file line number Diff line number Diff line change
Expand Up @@ -1391,8 +1391,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
mds->cxlds.reg_map.host = dev;
mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE;
mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
INIT_LIST_HEAD(&mds->ram_perf_list);
INIT_LIST_HEAD(&mds->pmem_perf_list);
mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID;
mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID;

return mds;
}
Expand Down
Loading

0 comments on commit ac389bc

Please sign in to comment.