summaryrefslogtreecommitdiff
path: root/fs/afs/addr_list.c
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2017-11-02 15:27:50 +0000
committerDavid Howells <dhowells@redhat.com>2017-11-13 15:38:18 +0000
commit8b2a464ced77fe35be72ab7d38152a9439daf8d3 (patch)
treeec478c8adebb6344513581b60935c0cf2b0ff937 /fs/afs/addr_list.c
parent989782dcdc91a5e6d5999c7a52a84a60a0811e56 (diff)
downloadlwn-8b2a464ced77fe35be72ab7d38152a9439daf8d3.tar.gz
lwn-8b2a464ced77fe35be72ab7d38152a9439daf8d3.zip
afs: Add an address list concept
Add an RCU replaceable address list structure to hold a list of server addresses. The list also holds the To this end: (1) A cell's VL server address list can be loaded directly via insmod or echo to /proc/fs/afs/cells or dynamically from a DNS query for AFSDB or SRV records. (2) Anyone wanting to use a cell's VL server address must wait until the cell record comes online and has tried to obtain some addresses. (3) An FS server's address list, for the moment, has a single entry that is the key to the server list. This will change in the future when a server is instead keyed on its UUID and the VL.GetAddrsU operation is used. (4) An 'address cursor' concept is introduced to handle iteration through the address list. This is passed to the afs_make_call() as, in the future, stuff (such as abort code) that doesn't outlast the call will be returned in it. In the future, we might want to annotate the list with information about how each address fares. We might then want to propagate such annotations over address list replacement. Whilst we're at it, we allow IPv6 addresses to be specified in colon-delimited lists by enclosing them in square brackets. Signed-off-by: David Howells <dhowells@redhat.com>
Diffstat (limited to 'fs/afs/addr_list.c')
-rw-r--r--fs/afs/addr_list.c308
1 files changed, 308 insertions, 0 deletions
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
new file mode 100644
index 000000000000..ecb9c72aebd2
--- /dev/null
+++ b/fs/afs/addr_list.c
@@ -0,0 +1,308 @@
+/* Server address list management
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/dns_resolver.h>
+#include <linux/inet.h>
+#include <keys/rxrpc-type.h>
+#include "internal.h"
+#include "afs_fs.h"
+
+#define AFS_MAX_ADDRESSES \
+ ((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) / \
+ sizeof(struct sockaddr_rxrpc)))
+
+/*
+ * Release an address list.
+ */
+void afs_put_addrlist(struct afs_addr_list *alist)
+{
+ if (alist && refcount_dec_and_test(&alist->usage))
+ call_rcu(&alist->rcu, (rcu_callback_t)kfree);
+}
+
+/*
+ * Allocate an address list.
+ */
+struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
+ unsigned short service,
+ unsigned short port)
+{
+ struct afs_addr_list *alist;
+ unsigned int i;
+
+ _enter("%u,%u,%u", nr, service, port);
+
+ alist = kzalloc(sizeof(*alist) + sizeof(alist->addrs[0]) * nr,
+ GFP_KERNEL);
+ if (!alist)
+ return NULL;
+
+ refcount_set(&alist->usage, 1);
+
+ for (i = 0; i < nr; i++) {
+ struct sockaddr_rxrpc *srx = &alist->addrs[i];
+ srx->srx_family = AF_RXRPC;
+ srx->srx_service = service;
+ srx->transport_type = SOCK_DGRAM;
+ srx->transport_len = sizeof(srx->transport.sin6);
+ srx->transport.sin6.sin6_family = AF_INET6;
+ srx->transport.sin6.sin6_port = htons(port);
+ }
+
+ return alist;
+}
+
+/*
+ * Parse a text string consisting of delimited addresses.
+ */
+struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
+ char delim,
+ unsigned short service,
+ unsigned short port)
+{
+ struct afs_addr_list *alist;
+ const char *p, *end = text + len;
+ unsigned int nr = 0;
+
+ _enter("%*.*s,%c", (int)len, (int)len, text, delim);
+
+ if (!len)
+ return ERR_PTR(-EDESTADDRREQ);
+
+ if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len)))
+ delim = ',';
+
+ /* Count the addresses */
+ p = text;
+ do {
+ if (!*p)
+ return ERR_PTR(-EINVAL);
+ if (*p == delim)
+ continue;
+ nr++;
+ if (*p == '[') {
+ p++;
+ if (p == end)
+ return ERR_PTR(-EINVAL);
+ p = memchr(p, ']', end - p);
+ if (!p)
+ return ERR_PTR(-EINVAL);
+ p++;
+ if (p >= end)
+ break;
+ }
+
+ p = memchr(p, delim, end - p);
+ if (!p)
+ break;
+ p++;
+ } while (p < end);
+
+ _debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES);
+ if (nr > AFS_MAX_ADDRESSES)
+ nr = AFS_MAX_ADDRESSES;
+
+ alist = afs_alloc_addrlist(nr, service, port);
+ if (!alist)
+ return ERR_PTR(-ENOMEM);
+
+ /* Extract the addresses */
+ p = text;
+ do {
+ struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs];
+ char tdelim = delim;
+
+ if (*p == delim) {
+ p++;
+ continue;
+ }
+
+ if (*p == '[') {
+ p++;
+ tdelim = ']';
+ }
+
+ if (in4_pton(p, end - p,
+ (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
+ tdelim, &p)) {
+ srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
+ srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
+ srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
+ } else if (in6_pton(p, end - p,
+ srx->transport.sin6.sin6_addr.s6_addr,
+ tdelim, &p)) {
+ /* Nothing to do */
+ } else {
+ goto bad_address;
+ }
+
+ if (tdelim == ']') {
+ if (p == end || *p != ']')
+ goto bad_address;
+ p++;
+ }
+
+ if (p < end) {
+ if (*p == '+') {
+ /* Port number specification "+1234" */
+ unsigned int xport = 0;
+ p++;
+ if (p >= end || !isdigit(*p))
+ goto bad_address;
+ do {
+ xport *= 10;
+ xport += *p - '0';
+ if (xport > 65535)
+ goto bad_address;
+ p++;
+ } while (p < end && isdigit(*p));
+ srx->transport.sin6.sin6_port = htons(xport);
+ } else if (*p == delim) {
+ p++;
+ } else {
+ goto bad_address;
+ }
+ }
+
+ alist->nr_addrs++;
+ } while (p < end && alist->nr_addrs < AFS_MAX_ADDRESSES);
+
+ _leave(" = [nr %u]", alist->nr_addrs);
+ return alist;
+
+bad_address:
+ kfree(alist);
+ return ERR_PTR(-EINVAL);
+}
+
+/*
+ * Compare old and new address lists to see if there's been any change.
+ * - How to do this in better than O(Nlog(N)) time?
+ * - We don't really want to sort the address list, but would rather take the
+ * list as we got it so as not to undo record rotation by the DNS server.
+ */
+#if 0
+static int afs_cmp_addr_list(const struct afs_addr_list *a1,
+ const struct afs_addr_list *a2)
+{
+}
+#endif
+
+/*
+ * Perform a DNS query for VL servers and build a up an address list.
+ */
+struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
+{
+ struct afs_addr_list *alist;
+ char *vllist = NULL;
+ int ret;
+
+ _enter("%s", cell->name);
+
+ ret = dns_query("afsdb", cell->name, cell->name_len,
+ "ipv4", &vllist, _expiry);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ alist = afs_parse_text_addrs(vllist, strlen(vllist), ',',
+ VL_SERVICE, AFS_VL_PORT);
+ if (IS_ERR(alist)) {
+ kfree(vllist);
+ if (alist != ERR_PTR(-ENOMEM))
+ pr_err("Failed to parse DNS data\n");
+ return alist;
+ }
+
+ kfree(vllist);
+ return alist;
+}
+
+/*
+ * Get an address to try.
+ */
+bool afs_iterate_addresses(struct afs_addr_cursor *ac)
+{
+ _enter("%hu+%hd", ac->start, (short)ac->index);
+
+ if (!ac->alist)
+ return false;
+
+ if (ac->begun) {
+ ac->index++;
+ if (ac->index == ac->alist->nr_addrs)
+ ac->index = 0;
+
+ if (ac->index == ac->start) {
+ ac->error = -EDESTADDRREQ;
+ return false;
+ }
+ }
+
+ ac->begun = true;
+ ac->responded = false;
+ ac->addr = &ac->alist->addrs[ac->index];
+ return true;
+}
+
+/*
+ * Release an address list cursor.
+ */
+int afs_end_cursor(struct afs_addr_cursor *ac)
+{
+ if (ac->responded && ac->index != ac->start)
+ WRITE_ONCE(ac->alist->index, ac->index);
+
+ afs_put_addrlist(ac->alist);
+ ac->alist = NULL;
+ return ac->error;
+}
+
+/*
+ * Set the address cursor for iterating over VL servers.
+ */
+int afs_set_vl_cursor(struct afs_addr_cursor *ac, struct afs_cell *cell)
+{
+ struct afs_addr_list *alist;
+ int ret;
+
+ if (!rcu_access_pointer(cell->vl_addrs)) {
+ ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
+ TASK_INTERRUPTIBLE);
+ if (ret < 0)
+ return ret;
+
+ if (!rcu_access_pointer(cell->vl_addrs) &&
+ ktime_get_real_seconds() < cell->dns_expiry)
+ return cell->error;
+ }
+
+ read_lock(&cell->vl_addrs_lock);
+ alist = rcu_dereference_protected(cell->vl_addrs,
+ lockdep_is_held(&cell->vl_addrs_lock));
+ if (alist->nr_addrs > 0)
+ afs_get_addrlist(alist);
+ else
+ alist = NULL;
+ read_unlock(&cell->vl_addrs_lock);
+
+ if (!alist)
+ return -EDESTADDRREQ;
+
+ ac->alist = alist;
+ ac->addr = NULL;
+ ac->start = READ_ONCE(alist->index);
+ ac->index = ac->start;
+ ac->error = 0;
+ ac->begun = false;
+ return 0;
+}