// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2020 Oracle Corporation
*
* Module Author: Mike Christie
*/
#include "dm-path-selector.h"
#include <linux/device-mapper.h>
#include <linux/module.h>
#define DM_MSG_PREFIX "multipath io-affinity"
struct path_info {
struct dm_path *path;
cpumask_var_t cpumask;
refcount_t refcount;
bool failed;
};
struct selector {
struct path_info **path_map;
cpumask_var_t path_mask;
atomic_t map_misses;
};
static void ioa_free_path(struct selector *s, unsigned int cpu)
{
struct path_info *pi = s->path_map[cpu];
if (!pi)
return;
if (refcount_dec_and_test(&pi->refcount)) {
cpumask_clear_cpu(cpu, s->path_mask);
free_cpumask_var(pi->cpumask);
kfree(pi);
s->path_map[cpu] = NULL;
}
}
static int ioa_add_path(struct path_selector *ps, struct dm_path *path,
int argc, char **argv, char **error)
{
struct selector *s = ps->context;
struct path_info *pi = NULL;
unsigned int cpu;
int ret;
if (argc != 1) {
*error = "io-affinity ps: invalid number of arguments";
return -EINVAL;
}
pi = kzalloc(sizeof(*pi), GFP_KERNEL);
if (!pi) {
*error = "io-affinity ps: Error allocating path context";
return -ENOMEM;
}
pi->path = path;
path->pscontext = pi;
refcount_set(&pi->refcount, 1);
if (!zalloc_cpumask_var(&pi->cpumask, GFP_KERNEL)) {
*error = "io-affinity ps: Error allocating cpumask context";
ret = -ENOMEM;
goto free_pi;
}
ret = cpumask_parse(argv[0], pi->cpumask);
if (ret) {
*error = "io-affinity ps: invalid cpumask";
ret = -EINVAL;
goto free_mask;
}
for_each_cpu(cpu, pi->cpumask) {
if (cpu >= nr_cpu_ids) {
DMWARN_LIMIT("Ignoring mapping for CPU %u. Max CPU is %u",
cpu, nr_cpu_ids);
break;
}
if (s->path_map[cpu]) {
DMWARN("CPU mapping for %u exists. Ignoring.", cpu);
continue;
}
cpumask_set_cpu(cpu, s->path_mask);
s->path_map[cpu] = pi;
refcount_inc(&pi->refcount);
continue;
}
if (refcount_dec_and_test(&pi->refcount)) {
*error = "io-affinity ps: No new/valid CPU mapping found";
ret = -EINVAL;
goto free_mask;
}
return 0;
free_mask:
free_cpumask_var(pi->cpumask);
free_pi:
kfree(pi);
return ret;
}
static int ioa_create(struct path_selector *ps, unsigned argc, char **argv)
{
struct selector *s;
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return -ENOMEM;
s->path_map = kzalloc(nr_cpu_ids * sizeof(struct path_info *),
GFP_KERNEL);
if (!s->path_map)
goto free_selector;
if (!zalloc_cpumask_var(&s->path_mask, GFP_KERNEL))
goto free_map;
atomic_set(&s->map_misses, 0);
ps->context = s;
return 0;
free_map:
kfree(s->path_map);
free_selector:
kfree(s);
return -ENOMEM;
}
static void ioa_destroy(struct path_selector *ps)
{
struct selector *s = ps->context;
unsigned cpu;
for_each_cpu(cpu, s->path_mask)
ioa_free_path(s, cpu);
free_cpumask_var(s->path_mask);
kfree(s->path_map);
kfree(s);
ps->context = NULL;
}
static int ioa_status(struct path_selector *ps, struct dm_path *path,
status_type_t type, char *result, unsigned int maxlen)
{
struct selector *s = ps->context;
struct path_info *pi;
int sz = 0;
if (!path) {
DMEMIT("0 ");
return sz;
}
switch(type) {
case STATUSTYPE_INFO:
DMEMIT("%d ", atomic_read(&s->map_misses));
break;
case STATUSTYPE_TABLE:
pi = path->pscontext;
DMEMIT("%*pb ", cpumask_pr_args(pi->cpumask));
break;
}
return sz;
}
static void ioa_fail_path(struct path_selector *ps, struct dm_path *p)
{
struct path_info *pi = p->pscontext;
pi->failed = true;
}
static int ioa_reinstate_path(struct path_selector *ps, struct dm_path *p)
{
struct path_info *pi = p->pscontext;
pi->failed = false;
return 0;
}
static struct dm_path *ioa_select_path(struct path_selector *ps,
size_t nr_bytes)
{
unsigned int cpu, node;
struct selector *s = ps->context;
const struct cpumask *cpumask;
struct path_info *pi;
int i;
cpu = get_cpu();
pi = s->path_map[cpu];
if (pi && !pi->failed)
goto done;
/*
* Perf is not optimal, but we at least try the local node then just
* try not to fail.
*/
if (!pi)
atomic_inc(&s->map_misses);
node = cpu_to_node(cpu);
cpumask = cpumask_of_node(node);
for_each_cpu(i, cpumask) {
pi = s->path_map[i];
if (pi && !pi->failed)
goto done;
}
for_each_cpu(i, s->path_mask) {
pi = s->path_map[i];
if (pi && !pi->failed)
goto done;
}
pi = NULL;
done:
put_cpu();
return pi ? pi->path : NULL;
}
static struct path_selector_type ioa_ps = {
.name = "io-affinity",
.module = THIS_MODULE,
.table_args = 1,
.info_args = 1,
.create = ioa_create,
.destroy = ioa_destroy,
.status = ioa_status,
.add_path = ioa_add_path,
.fail_path = ioa_fail_path,
.reinstate_path = ioa_reinstate_path,
.select_path = ioa_select_path,
};
static int __init dm_ioa_init(void)
{
int ret = dm_register_path_selector(&ioa_ps);
if (ret < 0)
DMERR("register failed %d", ret);
return ret;
}
static void __exit dm_ioa_exit(void)
{
int ret = dm_unregister_path_selector(&ioa_ps);
if (ret < 0)
DMERR("unregister failed %d", ret);
}
module_init(dm_ioa_init);
module_exit(dm_ioa_exit);
MODULE_DESCRIPTION(DM_NAME " multipath path selector that selects paths based on the CPU IO is being executed on");
MODULE_AUTHOR("Mike Christie <michael.christie@oracle.com>");
MODULE_LICENSE("GPL");