// SPDX-License-Identifier: GPL-2.0 #include #include #include #include #include #include struct memory_tier { /* hierarchy of memory tiers */ struct list_head list; /* list of all memory types part of this tier */ struct list_head memory_types; /* * start value of abstract distance. memory tier maps * an abstract distance range, * adistance_start .. adistance_start + MEMTIER_CHUNK_SIZE */ int adistance_start; }; struct node_memory_type_map { struct memory_dev_type *memtype; int map_count; }; static DEFINE_MUTEX(memory_tier_lock); static LIST_HEAD(memory_tiers); static struct node_memory_type_map node_memory_types[MAX_NUMNODES]; static struct memory_dev_type *default_dram_type; static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memtype) { bool found_slot = false; struct memory_tier *memtier, *new_memtier; int adistance = memtype->adistance; unsigned int memtier_adistance_chunk_size = MEMTIER_CHUNK_SIZE; lockdep_assert_held_once(&memory_tier_lock); /* * If the memtype is already part of a memory tier, * just return that. */ if (memtype->memtier) return memtype->memtier; adistance = round_down(adistance, memtier_adistance_chunk_size); list_for_each_entry(memtier, &memory_tiers, list) { if (adistance == memtier->adistance_start) { memtype->memtier = memtier; list_add(&memtype->tier_sibiling, &memtier->memory_types); return memtier; } else if (adistance < memtier->adistance_start) { found_slot = true; break; } } new_memtier = kmalloc(sizeof(struct memory_tier), GFP_KERNEL); if (!new_memtier) return ERR_PTR(-ENOMEM); new_memtier->adistance_start = adistance; INIT_LIST_HEAD(&new_memtier->list); INIT_LIST_HEAD(&new_memtier->memory_types); if (found_slot) list_add_tail(&new_memtier->list, &memtier->list); else list_add_tail(&new_memtier->list, &memory_tiers); memtype->memtier = new_memtier; list_add(&memtype->tier_sibiling, &new_memtier->memory_types); return new_memtier; } static inline void __init_node_memory_type(int node, struct memory_dev_type *memtype) { if (!node_memory_types[node].memtype) node_memory_types[node].memtype = memtype; /* * for each device getting added in the same NUMA node * with this specific memtype, bump the map count. We * Only take memtype device reference once, so that * changing a node memtype can be done by droping the * only reference count taken here. */ if (node_memory_types[node].memtype == memtype) { if (!node_memory_types[node].map_count++) kref_get(&memtype->kref); } } static struct memory_tier *set_node_memory_tier(int node) { struct memory_tier *memtier; struct memory_dev_type *memtype; lockdep_assert_held_once(&memory_tier_lock); if (!node_state(node, N_MEMORY)) return ERR_PTR(-EINVAL); __init_node_memory_type(node, default_dram_type); memtype = node_memory_types[node].memtype; node_set(node, memtype->nodes); memtier = find_create_memory_tier(memtype); return memtier; } static struct memory_tier *__node_get_memory_tier(int node) { struct memory_dev_type *memtype; memtype = node_memory_types[node]; if (memtype && node_isset(node, memtype->nodes)) return memtype->memtier; return NULL; } static void destroy_memory_tier(struct memory_tier *memtier) { list_del(&memtier->list); kfree(memtier); } static bool clear_node_memory_tier(int node) { bool cleared = false; struct memory_tier *memtier; memtier = __node_get_memory_tier(node); if (memtier) { struct memory_dev_type *memtype; memtype = node_memory_types[node].memtype; node_clear(node, memtype->nodes); if (nodes_empty(memtype->nodes)) { list_del_init(&memtype->tier_sibiling); memtype->memtier = NULL; if (list_empty(&memtier->memory_types)) destroy_memory_tier(memtier); } cleared = true; } return cleared; } static void release_memtype(struct kref *kref) { struct memory_dev_type *memtype; memtype = container_of(kref, struct memory_dev_type, kref); kfree(memtype); } struct memory_dev_type *alloc_memory_type(int adistance) { struct memory_dev_type *memtype; memtype = kmalloc(sizeof(*memtype), GFP_KERNEL); if (!memtype) return ERR_PTR(-ENOMEM); memtype->adistance = adistance; INIT_LIST_HEAD(&memtype->tier_sibiling); memtype->nodes = NODE_MASK_NONE; memtype->memtier = NULL; kref_init(&memtype->kref); return memtype; } EXPORT_SYMBOL_GPL(alloc_memory_type); void destroy_memory_type(struct memory_dev_type *memtype) { kref_put(&memtype->kref, release_memtype); } EXPORT_SYMBOL_GPL(destroy_memory_type); void init_node_memory_type(int node, struct memory_dev_type *memtype) { mutex_lock(&memory_tier_lock); __init_node_memory_type(node, memtype); mutex_unlock(&memory_tier_lock); } EXPORT_SYMBOL_GPL(init_node_memory_type); void clear_node_memory_type(int node, struct memory_dev_type *memtype) { mutex_lock(&memory_tier_lock); if (node_memory_types[node].memtype == memtype) node_memory_types[node].map_count--; /* * If we umapped all the attached devices to this node, * clear the node memory type. */ if (!node_memory_types[node].map_count) { node_memory_types[node].memtype = NULL; kref_put(&memtype->kref, release_memtype); } mutex_unlock(&memory_tier_lock); } EXPORT_SYMBOL_GPL(clear_node_memory_type); static int __meminit memtier_hotplug_callback(struct notifier_block *self, unsigned long action, void *_arg) { struct memory_notify *arg = _arg; /* * Only update the node migration order when a node is * changing status, like online->offline. */ if (arg->status_change_nid < 0) return notifier_from_errno(0); switch (action) { case MEM_OFFLINE: mutex_lock(&memory_tier_lock); clear_node_memory_tier(arg->status_change_nid); mutex_unlock(&memory_tier_lock); break; case MEM_ONLINE: mutex_lock(&memory_tier_lock); set_node_memory_tier(arg->status_change_nid); mutex_unlock(&memory_tier_lock); break; } return notifier_from_errno(0); } static int __init memory_tier_init(void) { int node; struct memory_tier *memtier; mutex_lock(&memory_tier_lock); /* * For now we can have 4 faster memory tiers with smaller adistance * than default DRAM tier. */ default_dram_type = alloc_memory_type(MEMTIER_ADISTANCE_DRAM); if (!default_dram_type) panic("%s() failed to allocate default DRAM tier\n", __func__); /* * Look at all the existing N_MEMORY nodes and add them to * default memory tier or to a tier if we already have memory * types assigned. */ for_each_node_state(node, N_MEMORY) { memtier = set_node_memory_tier(node); if (IS_ERR(memtier)) /* * Continue with memtiers we are able to setup */ break; } mutex_unlock(&memory_tier_lock); hotplug_memory_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRIO); return 0; } subsys_initcall(memory_tier_init); bool numa_demotion_enabled = false; #ifdef CONFIG_MIGRATION #ifdef CONFIG_SYSFS static ssize_t numa_demotion_enabled_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", numa_demotion_enabled ? "true" : "false"); } static ssize_t numa_demotion_enabled_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { ssize_t ret; ret = kstrtobool(buf, &numa_demotion_enabled); if (ret) return ret; return count; } static struct kobj_attribute numa_demotion_enabled_attr = __ATTR(demotion_enabled, 0644, numa_demotion_enabled_show, numa_demotion_enabled_store); static struct attribute *numa_attrs[] = { &numa_demotion_enabled_attr.attr, NULL, }; static const struct attribute_group numa_attr_group = { .attrs = numa_attrs, }; static int __init numa_init_sysfs(void) { int err; struct kobject *numa_kobj; numa_kobj = kobject_create_and_add("numa", mm_kobj); if (!numa_kobj) { pr_err("failed to create numa kobject\n"); return -ENOMEM; } err = sysfs_create_group(numa_kobj, &numa_attr_group); if (err) { pr_err("failed to register numa group\n"); goto delete_obj; } return 0; delete_obj: kobject_put(numa_kobj); return err; } subsys_initcall(numa_init_sysfs); #endif /* CONFIG_SYSFS */ #endif