diff options
39 files changed, 1216 insertions, 475 deletions
diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkback b/Documentation/ABI/testing/sysfs-driver-xen-blkback index ecb7942ff146..ac2947b98950 100644 --- a/Documentation/ABI/testing/sysfs-driver-xen-blkback +++ b/Documentation/ABI/testing/sysfs-driver-xen-blkback @@ -35,3 +35,12 @@ Description: controls the duration in milliseconds that blkback will not cache any page not backed by a grant mapping. The default is 10ms. + +What: /sys/module/xen_blkback/parameters/feature_persistent +Date: September 2020 +KernelVersion: 5.10 +Contact: SeongJae Park <sjpark@amazon.de> +Description: + Whether to enable the persistent grants feature or not. Note + that this option only takes effect on newly created backends. + The default is Y (enable). diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkfront b/Documentation/ABI/testing/sysfs-driver-xen-blkfront index c0a6cb7eb314..28008905615f 100644 --- a/Documentation/ABI/testing/sysfs-driver-xen-blkfront +++ b/Documentation/ABI/testing/sysfs-driver-xen-blkfront @@ -1,4 +1,4 @@ -What: /sys/module/xen_blkfront/parameters/max +What: /sys/module/xen_blkfront/parameters/max_indirect_segments Date: June 2013 KernelVersion: 3.11 Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> @@ -8,3 +8,12 @@ Description: is 32 - higher value means more potential throughput but more memory usage. The backend picks the minimum of the frontend and its default backend value. + +What: /sys/module/xen_blkfront/parameters/feature_persistent +Date: September 2020 +KernelVersion: 5.10 +Contact: SeongJae Park <sjpark@amazon.de> +Description: + Whether to enable the persistent grants feature or not. Note + that this option only takes effect on newly created frontends. + The default is Y (enable). diff --git a/Documentation/admin-guide/LSM/SafeSetID.rst b/Documentation/admin-guide/LSM/SafeSetID.rst index 7bff07ce4fdd..17996c9070e2 100644 --- a/Documentation/admin-guide/LSM/SafeSetID.rst +++ b/Documentation/admin-guide/LSM/SafeSetID.rst @@ -3,9 +3,9 @@ SafeSetID ========= SafeSetID is an LSM module that gates the setid family of syscalls to restrict UID/GID transitions from a given UID/GID to only those approved by a -system-wide whitelist. These restrictions also prohibit the given UIDs/GIDs +system-wide allowlist. These restrictions also prohibit the given UIDs/GIDs from obtaining auxiliary privileges associated with CAP_SET{U/G}ID, such as -allowing a user to set up user namespace UID mappings. +allowing a user to set up user namespace UID/GID mappings. Background @@ -98,10 +98,21 @@ Directions for use ================== This LSM hooks the setid syscalls to make sure transitions are allowed if an applicable restriction policy is in place. Policies are configured through -securityfs by writing to the safesetid/add_whitelist_policy and -safesetid/flush_whitelist_policies files at the location where securityfs is -mounted. The format for adding a policy is '<UID>:<UID>', using literal -numbers, such as '123:456'. To flush the policies, any write to the file is -sufficient. Again, configuring a policy for a UID will prevent that UID from -obtaining auxiliary setid privileges, such as allowing a user to set up user -namespace UID mappings. +securityfs by writing to the safesetid/uid_allowlist_policy and +safesetid/gid_allowlist_policy files at the location where securityfs is +mounted. The format for adding a policy is '<UID>:<UID>' or '<GID>:<GID>', +using literal numbers, and ending with a newline character such as '123:456\n'. +Writing an empty string "" will flush the policy. Again, configuring a policy +for a UID/GID will prevent that UID/GID from obtaining auxiliary setid +privileges, such as allowing a user to set up user namespace UID/GID mappings. + +Note on GID policies and setgroups() +================== +In v5.9 we are adding support for limiting CAP_SETGID privileges as was done +previously for CAP_SETUID. However, for compatibility with common sandboxing +related code conventions in userspace, we currently allow arbitrary +setgroups() calls for processes with CAP_SETGID restrictions. Until we add +support in a future release for restricting setgroups() calls, these GID +policies add no meaningful security. setgroups() restrictions will be enforced +once we have the policy checking code in place, which will rely on GID policy +configuration code added in v5.9. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 02d4adbf98d2..526d65d8573a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5978,6 +5978,13 @@ After which time (jiffies) the event handling loop should start to delay EOI handling. Default is 2. + xen.fifo_events= [XEN] + Boolean parameter to disable using fifo event handling + even if available. Normally fifo event handling is + preferred over the 2-level event handling, as it is + fairer and the number of possible event channels is + much higher. Default is on (use fifo events). + nopv= [X86,XEN,KVM,HYPER_V,VMWARE] Disables the PV optimizations forcing the guest to run as generic guest with no PV drivers. Currently support diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 5d458a44b09c..9549496f5523 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -6,7 +6,7 @@ * Copyright (C) 1999-2003 Matthew Wilcox <willy at parisc-linux.org> * Copyright (C) 2000-2003 Paul Bame <bame at parisc-linux.org> * Copyright (C) 2001 Thomas Bogendoerfer <tsbogend at parisc-linux.org> - * Copyright (C) 1999-2014 Helge Deller <deller@gmx.de> + * Copyright (C) 1999-2020 Helge Deller <deller@gmx.de> */ #include <linux/uaccess.h> @@ -23,6 +23,7 @@ #include <linux/utsname.h> #include <linux/personality.h> #include <linux/random.h> +#include <linux/compat.h> /* we construct an artificial offset for the mapping based on the physical * address of the kernel mapping variable */ @@ -373,3 +374,73 @@ long parisc_personality(unsigned long personality) return err; } + +/* + * Up to kernel v5.9 we defined O_NONBLOCK as 000200004, + * since then O_NONBLOCK is defined as 000200000. + * + * The following wrapper functions mask out the old + * O_NDELAY bit from calls which use O_NONBLOCK. + * + * XXX: Remove those in year 2022 (or later)? + */ + +#define O_NONBLOCK_OLD 000200004 +#define O_NONBLOCK_MASK_OUT (O_NONBLOCK_OLD & ~O_NONBLOCK) + +static int FIX_O_NONBLOCK(int flags) +{ + if (flags & O_NONBLOCK_MASK_OUT) { + struct task_struct *tsk = current; + pr_warn_once("%s(%d) uses a deprecated O_NONBLOCK value.\n", + tsk->comm, tsk->pid); + } + return flags & ~O_NONBLOCK_MASK_OUT; +} + +asmlinkage long parisc_timerfd_create(int clockid, int flags) +{ + flags = FIX_O_NONBLOCK(flags); + return sys_timerfd_create(clockid, flags); +} + +asmlinkage long parisc_signalfd4(int ufd, sigset_t __user *user_mask, + size_t sizemask, int flags) +{ + flags = FIX_O_NONBLOCK(flags); + return sys_signalfd4(ufd, user_mask, sizemask, flags); +} + +#ifdef CONFIG_COMPAT +asmlinkage long parisc_compat_signalfd4(int ufd, + compat_sigset_t __user *user_mask, + compat_size_t sizemask, int flags) +{ + flags = FIX_O_NONBLOCK(flags); + return compat_sys_signalfd4(ufd, user_mask, sizemask, flags); +} +#endif + +asmlinkage long parisc_eventfd2(unsigned int count, int flags) +{ + flags = FIX_O_NONBLOCK(flags); + return sys_eventfd2(count, flags); +} + +asmlinkage long parisc_userfaultfd(int flags) +{ + flags = FIX_O_NONBLOCK(flags); + return sys_userfaultfd(flags); +} + +asmlinkage long parisc_pipe2(int __user *fildes, int flags) +{ + flags = FIX_O_NONBLOCK(flags); + return sys_pipe2(fildes, flags); +} + +asmlinkage long parisc_inotify_init1(int flags) +{ + flags = FIX_O_NONBLOCK(flags); + return sys_inotify_init1(flags); +} diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 38c63e5404bc..f375ea528e59 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -344,17 +344,17 @@ 304 common eventfd sys_eventfd 305 32 fallocate parisc_fallocate 305 64 fallocate sys_fallocate -306 common timerfd_create sys_timerfd_create +306 common timerfd_create parisc_timerfd_create 307 32 timerfd_settime sys_timerfd_settime32 307 64 timerfd_settime sys_timerfd_settime 308 32 timerfd_gettime sys_timerfd_gettime32 308 64 timerfd_gettime sys_timerfd_gettime -309 common signalfd4 sys_signalfd4 compat_sys_signalfd4 -310 common eventfd2 sys_eventfd2 +309 common signalfd4 parisc_signalfd4 parisc_compat_signalfd4 +310 common eventfd2 parisc_eventfd2 311 common epoll_create1 sys_epoll_create1 312 common dup3 sys_dup3 -313 common pipe2 sys_pipe2 -314 common inotify_init1 sys_inotify_init1 +313 common pipe2 parisc_pipe2 +314 common inotify_init1 parisc_inotify_init1 315 common preadv sys_preadv compat_sys_preadv 316 common pwritev sys_pwritev compat_sys_pwritev 317 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo @@ -387,7 +387,7 @@ 341 common bpf sys_bpf 342 common execveat sys_execveat compat_sys_execveat 343 common membarrier sys_membarrier -344 common userfaultfd sys_userfaultfd +344 common userfaultfd parisc_userfaultfd 345 common mlock2 sys_mlock2 346 common copy_file_range sys_copy_file_range 347 common preadv2 sys_preadv2 compat_sys_preadv2 diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 04508158815c..13d94f0f94a0 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -180,9 +180,16 @@ static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm) static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm) { time64_t secs = rtc_tm_to_time64(tm); - - if (pdc_tod_set(secs, 0) < 0) + int ret; + + /* hppa has Y2K38 problem: pdc_tod_set() takes an u32 value! */ + ret = pdc_tod_set(secs, 0); + if (ret != 0) { + pr_warn("pdc_tod_set(%lld) returned error %d\n", secs, ret); + if (ret == PDC_INVALID_ARG) + return -EINVAL; return -EOPNOTSUPP; + } return 0; } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 2097fa0ebdb5..c1b2f764b29a 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -88,14 +88,17 @@ int xen_smp_intr_init(unsigned int cpu) per_cpu(xen_callfunc_irq, cpu).irq = rc; per_cpu(xen_callfunc_irq, cpu).name = callfunc_name; - debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); - rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt, - IRQF_PERCPU | IRQF_NOBALANCING, - debug_name, NULL); - if (rc < 0) - goto fail; - per_cpu(xen_debug_irq, cpu).irq = rc; - per_cpu(xen_debug_irq, cpu).name = debug_name; + if (!xen_fifo_events) { + debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); + rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, + xen_debug_interrupt, + IRQF_PERCPU | IRQF_NOBALANCING, + debug_name, NULL); + if (rc < 0) + goto fail; + per_cpu(xen_debug_irq, cpu).irq = rc; + per_cpu(xen_debug_irq, cpu).name = debug_name; + } callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 45d556f71858..9546c3384c75 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -29,6 +29,8 @@ extern struct start_info *xen_start_info; extern struct shared_info xen_dummy_shared_info; extern struct shared_info *HYPERVISOR_shared_info; +extern bool xen_fifo_events; + void xen_setup_mfn_list_list(void); void xen_build_mfn_list_list(void); void xen_setup_machphys_mapping(void); diff --git a/drivers/ata/pata_ns87415.c b/drivers/ata/pata_ns87415.c index 4b2ba813dcab..1532b2e3c672 100644 --- a/drivers/ata/pata_ns87415.c +++ b/drivers/ata/pata_ns87415.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * pata_ns87415.c - NS87415 (non PARISC) PATA + * pata_ns87415.c - NS87415 (and PARISC SUPERIO 87560) PATA * * (C) 2005 Red Hat <alan@lxorguk.ukuu.org.uk> * @@ -16,7 +16,6 @@ * systems. This has its own special mountain of errata. * * TODO: - * Test PARISC SuperIO * Get someone to test on SPARC * Implement lazy pio/dma switching for better performance * 8bit shared timing. diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 5e7c36d73dc6..f5705569e2a7 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -473,6 +473,12 @@ static void xen_vbd_free(struct xen_vbd *vbd) vbd->bdev = NULL; } +/* Enable the persistent grants feature. */ +static bool feature_persistent = true; +module_param(feature_persistent, bool, 0644); +MODULE_PARM_DESC(feature_persistent, + "Enables the persistent grants feature"); + static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, unsigned major, unsigned minor, int readonly, int cdrom) @@ -518,6 +524,8 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, if (q && blk_queue_secure_erase(q)) vbd->discard_secure = true; + vbd->feature_gnt_persistent = feature_persistent; + pr_debug("Successful creation of handle=%04x (dom=%u)\n", handle, blkif->domid); return 0; @@ -905,7 +913,8 @@ again: xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support); - err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1); + err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", + be->blkif->vbd.feature_gnt_persistent); if (err) { xenbus_dev_fatal(dev, err, "writing %s/feature-persistent", dev->nodename); @@ -1066,7 +1075,6 @@ static int connect_ring(struct backend_info *be) { struct xenbus_device *dev = be->dev; struct xen_blkif *blkif = be->blkif; - unsigned int pers_grants; char protocol[64] = ""; int err, i; char *xspath; @@ -1092,9 +1100,11 @@ static int connect_ring(struct backend_info *be) xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -ENOSYS; } - pers_grants = xenbus_read_unsigned(dev->otherend, "feature-persistent", - 0); - blkif->vbd.feature_gnt_persistent = pers_grants; + if (blkif->vbd.feature_gnt_persistent) + blkif->vbd.feature_gnt_persistent = + xenbus_read_unsigned(dev->otherend, + "feature-persistent", 0); + blkif->vbd.overflow_max_grants = 0; /* @@ -1117,7 +1127,7 @@ static int connect_ring(struct backend_info *be) pr_info("%s: using %d queues, protocol %d (%s) %s\n", dev->nodename, blkif->nr_rings, blkif->blk_protocol, protocol, - pers_grants ? "persistent grants" : ""); + blkif->vbd.feature_gnt_persistent ? "persistent grants" : ""); ring_page_order = xenbus_read_unsigned(dev->otherend, "ring-page-order", 0); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 91de2e0755ae..48629d3433b4 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1866,8 +1866,8 @@ again: message = "writing protocol"; goto abort_transaction; } - err = xenbus_printf(xbt, dev->nodename, - "feature-persistent", "%u", 1); + err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", + info->feature_persistent); if (err) dev_warn(&dev->dev, "writing persistent grants feature to xenbus"); @@ -1941,6 +1941,13 @@ static int negotiate_mq(struct blkfront_info *info) } return 0; } + +/* Enable the persistent grants feature. */ +static bool feature_persistent = true; +module_param(feature_persistent, bool, 0644); +MODULE_PARM_DESC(feature_persistent, + "Enables the persistent grants feature"); + /** * Entry point to this code when a new device is created. Allocate the basic * structures and the ring buffer for communication with the backend, and @@ -2007,6 +2014,8 @@ static int blkfront_probe(struct xenbus_device *dev, info->vdevice = vdevice; info->connected = BLKIF_STATE_DISCONNECTED; + info->feature_persistent = feature_persistent; + /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); dev_set_drvdata(&dev->dev, info); @@ -2316,9 +2325,10 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) if (xenbus_read_unsigned(info->xbdev->otherend, "feature-discard", 0)) blkfront_setup_discard(info); - info->feature_persistent = - !!xenbus_read_unsigned(info->xbdev->otherend, - "feature-persistent", 0); + if (info->feature_persistent) + info->feature_persistent = + !!xenbus_read_unsigned(info->xbdev->otherend, + "feature-persistent", 0); indirect_segments = xenbus_read_unsigned(info->xbdev->otherend, "feature-max-indirect-segments", 0); diff --git a/drivers/char/random.c b/drivers/char/random.c index d20ba1b104ca..2a41b21623ae 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1277,7 +1277,6 @@ void add_interrupt_randomness(int irq, int irq_flags) fast_mix(fast_pool); add_interrupt_bench(cycles); - this_cpu_add(net_rand_state.s1, fast_pool->pool[cycles & 3]); if (unlikely(crng_init == 0)) { if ((fast_pool->count >= 64) && diff --git a/drivers/input/serio/hil_mlc.c b/drivers/input/serio/hil_mlc.c index 65f4e9d62a67..d36e89d6fc54 100644 --- a/drivers/input/serio/hil_mlc.c +++ b/drivers/input/serio/hil_mlc.c @@ -74,7 +74,7 @@ EXPORT_SYMBOL(hil_mlc_unregister); static LIST_HEAD(hil_mlcs); static DEFINE_RWLOCK(hil_mlcs_lock); static struct timer_list hil_mlcs_kicker; -static int hil_mlcs_probe; +static int hil_mlcs_probe, hil_mlc_stop; static void hil_mlcs_process(unsigned long unused); static DECLARE_TASKLET_DISABLED_OLD(hil_mlcs_tasklet, hil_mlcs_process); @@ -702,9 +702,13 @@ static int hilse_donode(hil_mlc *mlc) if (!mlc->ostarted) { mlc->ostarted = 1; mlc->opacket = pack; - mlc->out(mlc); + rc = mlc->out(mlc); nextidx = HILSEN_DOZE; write_unlock_irqrestore(&mlc->lock, flags); + if (rc) { + hil_mlc_stop = 1; + return 1; + } break; } mlc->ostarted = 0; @@ -715,8 +719,13 @@ static int hilse_donode(hil_mlc *mlc) case HILSE_CTS: write_lock_irqsave(&mlc->lock, flags); - nextidx = mlc->cts(mlc) ? node->bad : node->good; + rc = mlc->cts(mlc); + nextidx = rc ? node->bad : node->good; write_unlock_irqrestore(&mlc->lock, flags); + if (rc) { + hil_mlc_stop = 1; + return 1; + } break; default: @@ -780,6 +789,12 @@ static void hil_mlcs_process(unsigned long unused) static void hil_mlcs_timer(struct timer_list *unused) { + if (hil_mlc_stop) { + /* could not send packet - stop immediately. */ + pr_warn(PREFIX "HIL seems stuck - Disabling HIL MLC.\n"); + return; + } + hil_mlcs_probe = 1; tasklet_schedule(&hil_mlcs_tasklet); /* Re-insert the periodic task. */ diff --git a/drivers/input/serio/hp_sdc_mlc.c b/drivers/input/serio/hp_sdc_mlc.c index 232d30c825bd..3e85e9039374 100644 --- a/drivers/input/serio/hp_sdc_mlc.c +++ b/drivers/input/serio/hp_sdc_mlc.c @@ -210,7 +210,7 @@ static int hp_sdc_mlc_cts(hil_mlc *mlc) priv->tseq[2] = 1; priv->tseq[3] = 0; priv->tseq[4] = 0; - __hp_sdc_enqueue_transaction(&priv->trans); + return __hp_sdc_enqueue_transaction(&priv->trans); busy: return 1; done: @@ -219,7 +219,7 @@ static int hp_sdc_mlc_cts(hil_mlc *mlc) return 0; } -static void hp_sdc_mlc_out(hil_mlc *mlc) +static int hp_sdc_mlc_out(hil_mlc *mlc) { struct hp_sdc_mlc_priv_s *priv; @@ -234,7 +234,7 @@ static void hp_sdc_mlc_out(hil_mlc *mlc) do_data: if (priv->emtestmode) { up(&mlc->osem); - return; + return 0; } /* Shouldn't be sending commands when loop may be busy */ BUG_ON(down_trylock(&mlc->csem)); @@ -296,7 +296,7 @@ static void hp_sdc_mlc_out(hil_mlc *mlc) BUG_ON(down_trylock(&mlc->csem)); } enqueue: - hp_sdc_enqueue_transaction(&priv->trans); + return hp_sdc_enqueue_transaction(&priv->trans); } static int __init hp_sdc_mlc_init(void) diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index fe5ad0e89cd8..da87f3a1e351 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c @@ -47,10 +47,11 @@ static unsigned evtchn_2l_max_channels(void) return EVTCHN_2L_NR_CHANNELS; } -static void evtchn_2l_bind_to_cpu(struct irq_info *info, unsigned cpu) +static void evtchn_2l_bind_to_cpu(evtchn_port_t evtchn, unsigned int cpu, + unsigned int old_cpu) { - clear_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, info->cpu))); - set_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, cpu))); + clear_bit(evtchn, BM(per_cpu(cpu_evtchn_mask, old_cpu))); + set_bit(evtchn, BM(per_cpu(cpu_evtchn_mask, cpu))); } static void evtchn_2l_clear_pending(evtchn_port_t port) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index cc317739e786..6038c4c35db5 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -70,6 +70,57 @@ #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "xen." +/* Interrupt types. */ +enum xen_irq_type { + IRQT_UNBOUND = 0, + IRQT_PIRQ, + IRQT_VIRQ, + IRQT_IPI, + IRQT_EVTCHN +}; + +/* + * Packed IRQ information: + * type - enum xen_irq_type + * event channel - irq->event channel mapping + * cpu - cpu this event channel is bound to + * index - type-specific information: + * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM + * guest, or GSI (real passthrough IRQ) of the device. + * VIRQ - virq number + * IPI - IPI vector + * EVTCHN - + */ +struct irq_info { + struct list_head list; + struct list_head eoi_list; + short refcnt; + short spurious_cnt; + enum xen_irq_type type; /* type */ + unsigned irq; + evtchn_port_t evtchn; /* event channel */ + unsigned short cpu; /* cpu bound */ + unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */ + unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */ + u64 eoi_time; /* Time in jiffies when to EOI. */ + + union { + unsigned short virq; + enum ipi_vector ipi; + struct { + unsigned short pirq; + unsigned short gsi; + unsigned char vector; + unsigned char flags; + uint16_t domid; + } pirq; + } u; +}; + +#define PIRQ_NEEDS_EOI (1 << 0) +#define PIRQ_SHAREABLE (1 << 1) +#define PIRQ_MSI_GROUP (1 << 2) + static uint __read_mostly event_loop_timeout = 2; module_param(event_loop_timeout, uint, 0644); @@ -110,7 +161,7 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1}; /* IRQ <-> IPI mapping */ static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1}; -int **evtchn_to_irq; +static int **evtchn_to_irq; #ifdef CONFIG_X86 static unsigned long *pirq_eoi_map; #endif @@ -190,7 +241,7 @@ int get_evtchn_to_irq(evtchn_port_t evtchn) } /* Get info for IRQ */ -struct irq_info *info_for_irq(unsigned irq) +static struct irq_info *info_for_irq(unsigned irq) { if (irq < nr_legacy_irqs()) return legacy_info_ptrs[irq]; @@ -228,7 +279,7 @@ static int xen_irq_info_common_setup(struct irq_info *info, irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN); - return xen_evtchn_port_setup(info); + return xen_evtchn_port_setup(evtchn); } static int xen_irq_info_evtchn_setup(unsigned irq, @@ -351,7 +402,7 @@ static enum xen_irq_type type_from_irq(unsigned irq) return info_for_irq(irq)->type; } -unsigned cpu_from_irq(unsigned irq) +static unsigned cpu_from_irq(unsigned irq) { return info_for_irq(irq)->cpu; } @@ -391,7 +442,7 @@ static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu) #ifdef CONFIG_SMP cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu)); #endif - xen_evtchn_port_bind_to_cpu(info, cpu); + xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu); info->cpu = cpu; } @@ -745,7 +796,7 @@ static unsigned int __startup_pirq(unsigned int irq) info->evtchn = evtchn; bind_evtchn_to_cpu(evtchn, 0); - rc = xen_evtchn_port_setup(info); + rc = xen_evtchn_port_setup(evtchn); if (rc) goto err; @@ -1145,14 +1196,6 @@ static int bind_interdomain_evtchn_to_irq_chip(unsigned int remote_domain, chip); } -int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, - evtchn_port_t remote_port) -{ - return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, - &xen_dynamic_chip); -} -EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq); - int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain, evtchn_port_t remote_port) { @@ -1320,19 +1363,6 @@ static int bind_interdomain_evtchn_to_irqhandler_chip( return irq; } -int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - evtchn_port_t remote_port, - irq_handler_t handler, - unsigned long irqflags, - const char *devname, - void *dev_id) -{ - return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain, - remote_port, handler, irqflags, devname, - dev_id, &xen_dynamic_chip); -} -EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler); - int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain, evtchn_port_t remote_port, irq_handler_t handler, @@ -2020,8 +2050,8 @@ void xen_setup_callback_vector(void) {} static inline void xen_alloc_callback_vector(void) {} #endif -static bool fifo_events = true; -module_param(fifo_events, bool, 0); +bool xen_fifo_events = true; +module_param_named(fifo_events, xen_fifo_events, bool, 0); static int xen_evtchn_cpu_prepare(unsigned int cpu) { @@ -2050,10 +2080,12 @@ void __init xen_init_IRQ(void) int ret = -EINVAL; evtchn_port_t evtchn; - if (fifo_events) + if (xen_fifo_events) ret = xen_evtchn_fifo_init(); - if (ret < 0) + if (ret < 0) { xen_evtchn_2l_init(); + xen_fifo_events = false; + } xen_cpu_init_eoi(smp_processor_id()); diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 6085a808da95..b234f1766810 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -138,9 +138,8 @@ static void init_array_page(event_word_t *array_page) array_page[i] = 1 << EVTCHN_FIFO_MASKED; } -static int evtchn_fifo_setup(struct irq_info *info) +static int evtchn_fifo_setup(evtchn_port_t port) { - evtchn_port_t port = info->evtchn; unsigned new_array_pages; int ret; @@ -186,7 +185,8 @@ static int evtchn_fifo_setup(struct irq_info *info) return ret; } -static void evtchn_fifo_bind_to_cpu(struct irq_info *info, unsigned cpu) +static void evtchn_fifo_bind_to_cpu(evtchn_port_t evtchn, unsigned int cpu, + unsigned int old_cpu) { /* no-op */ } @@ -237,6 +237,9 @@ static bool clear_masked_cond(volatile event_word_t *word) w = *word; do { + if (!(w & (1 << EVTCHN_FIFO_MASKED))) + return true; + if (w & (1 << EVTCHN_FIFO_PENDING)) return false; diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index 82937d90d7d7..0a97c0549db7 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -7,65 +7,15 @@ #ifndef __EVENTS_INTERNAL_H__ #define __EVENTS_INTERNAL_H__ -/* Interrupt types. */ -enum xen_irq_type { - IRQT_UNBOUND = 0, - IRQT_PIRQ, - IRQT_VIRQ, - IRQT_IPI, - IRQT_EVTCHN -}; - -/* - * Packed IRQ information: - * type - enum xen_irq_type - * event channel - irq->event channel mapping - * cpu - cpu this event channel is bound to - * index - type-specific information: - * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM - * guest, or GSI (real passthrough IRQ) of the device. - * VIRQ - virq number - * IPI - IPI vector - * EVTCHN - - */ -struct irq_info { - struct list_head list; - struct list_head eoi_list; - short refcnt; - short spurious_cnt; - enum xen_irq_type type; /* type */ - unsigned irq; - evtchn_port_t evtchn; /* event channel */ - unsigned short cpu; /* cpu bound */ - unsigned short eoi_cpu; /* EOI must happen on this cpu */ - unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */ - u64 eoi_time; /* Time in jiffies when to EOI. */ - - union { - unsigned short virq; - enum ipi_vector ipi; - struct { - unsigned short pirq; - unsigned short gsi; - unsigned char vector; - unsigned char flags; - uint16_t domid; - } pirq; - } u; -}; - -#define PIRQ_NEEDS_EOI (1 << 0) -#define PIRQ_SHAREABLE (1 << 1) -#define PIRQ_MSI_GROUP (1 << 2) - struct evtchn_loop_ctrl; struct evtchn_ops { unsigned (*max_channels)(void); unsigned (*nr_channels)(void); - int (*setup)(struct irq_info *info); - void (*bind_to_cpu)(struct irq_info *info, unsigned cpu); + int (*setup)(evtchn_port_t port); + void (*bind_to_cpu)(evtchn_port_t evtchn, unsigned int cpu, + unsigned int old_cpu); void (*clear_pending)(evtchn_port_t port); void (*set_pending)(evtchn_port_t port); @@ -83,12 +33,9 @@ struct evtchn_ops { extern const struct evtchn_ops *evtchn_ops; -extern int **evtchn_to_irq; int get_evtchn_to_irq(evtchn_port_t evtchn); void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl); -struct irq_info *info_for_irq(unsigned irq); -unsigned cpu_from_irq(unsigned irq); unsigned int cpu_from_evtchn(evtchn_port_t evtchn); static inline unsigned xen_evtchn_max_channels(void) @@ -100,17 +47,18 @@ static inline unsigned xen_evtchn_max_channels(void) * Do any ABI specific setup for a bound event channel before it can * be unmasked and used. */ -static inline int xen_evtchn_port_setup(struct irq_info *info) +static inline int xen_evtchn_port_setup(evtchn_port_t evtchn) { if (evtchn_ops->setup) - return evtchn_ops->setup(info); + return evtchn_ops->setup(evtchn); return 0; } -static inline void xen_evtchn_port_bind_to_cpu(struct irq_info *info, - unsigned cpu) +static inline void xen_evtchn_port_bind_to_cpu(evtchn_port_t evtchn, + unsigned int cpu, + unsigned int old_cpu) { - evtchn_ops->bind_to_cpu(info, cpu); + evtchn_ops->bind_to_cpu(evtchn, cpu, old_cpu); } static inline void clear_evtchn(evtchn_port_t port) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 99b3180c613a..905d03863721 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -156,5 +156,5 @@ extern int cifs_truncate_page(struct address_space *mapping, loff_t from); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.28" +#define CIFS_VERSION "2.29" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index b6925aeeb621..484ec2d8c5c9 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -298,6 +298,10 @@ struct smb_version_operations { /* query file data from the server */ int (*query_file_info)(const unsigned int, struct cifs_tcon *, struct cifs_fid *, FILE_ALL_INFO *); + /* query reparse tag from srv to determine which type of special file */ + int (*query_reparse_tag)(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, const char *path, + __u32 *reparse_tag); /* get server index number */ int (*get_srv_inum)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *, diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index daec31be8571..9ee5f304592f 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -656,7 +656,7 @@ smb311_posix_info_to_fattr(struct cifs_fattr *fattr, struct smb311_posix_qinfo * static void cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, struct super_block *sb, bool adjust_tz, - bool symlink) + bool symlink, u32 reparse_tag) { struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); @@ -684,8 +684,22 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, fattr->cf_createtime = le64_to_cpu(info->CreationTime); fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); - - if (symlink) { + if (reparse_tag == IO_REPARSE_TAG_LX_SYMLINK) { + fattr->cf_mode |= S_IFLNK | cifs_sb->mnt_file_mode; + fattr->cf_dtype = DT_LNK; + } else if (reparse_tag == IO_REPARSE_TAG_LX_FIFO) { + fattr->cf_mode |= S_IFIFO | cifs_sb->mnt_file_mode; + fattr->cf_dtype = DT_FIFO; + } else if (reparse_tag == IO_REPARSE_TAG_AF_UNIX) { + fattr->cf_mode |= S_IFSOCK | cifs_sb->mnt_file_mode; + fattr->cf_dtype = DT_SOCK; + } else if (reparse_tag == IO_REPARSE_TAG_LX_CHR) { + fattr->cf_mode |= S_IFCHR | cifs_sb->mnt_file_mode; + fattr->cf_dtype = DT_CHR; + } else if (reparse_tag == IO_REPARSE_TAG_LX_BLK) { + fattr->cf_mode |= S_IFBLK | cifs_sb->mnt_file_mode; + fattr->cf_dtype = DT_BLK; + } else if (symlink) { /* TODO add more reparse tag checks */ fattr->cf_mode = S_IFLNK; fattr->cf_dtype = DT_LNK; } else if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { @@ -740,8 +754,9 @@ cifs_get_file_info(struct file *filp) rc = server->ops->query_file_info(xid, tcon, &cfile->fid, &find_data); switch (rc) { case 0: + /* TODO: add support to query reparse tag */ cifs_all_info_to_fattr(&fattr, &find_data, inode->i_sb, false, - false); + false, 0 /* no reparse tag */); break; case -EREMOTE: cifs_create_dfs_fattr(&fattr, inode->i_sb); @@ -910,12 +925,13 @@ cifs_get_inode_info(struct inode **inode, struct cifs_sb_info *cifs_sb = CIFS_SB(sb); bool adjust_tz = false; struct cifs_fattr fattr = {0}; - bool symlink = false; + bool is_reparse_point = false; FILE_ALL_INFO *data = in_data; FILE_ALL_INFO *tmp_data = NULL; void *smb1_backup_rsp_buf = NULL; int rc = 0; int tmprc = 0; + __u32 reparse_tag = 0; tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) @@ -938,8 +954,8 @@ cifs_get_inode_info(struct inode **inode, goto out; } rc = server->ops->query_path_info(xid, tcon, cifs_sb, - full_path, tmp_data, - &adjust_tz, &symlink); + full_path, tmp_data, + &adjust_tz, &is_reparse_point); data = tmp_data; } @@ -949,7 +965,19 @@ cifs_get_inode_info(struct inode **inode, switch (rc) { case 0: - cifs_all_info_to_fattr(&fattr, data, sb, adjust_tz, symlink); + /* + * If the file is a reparse point, it is more complicated + * since we have to check if its reparse tag matches a known + * special file type e.g. symlink or fifo or char etc. + */ + if ((le32_to_cpu(data->Attributes) & ATTR_REPARSE) && + server->ops->query_reparse_tag) { + rc = server->ops->query_reparse_tag(xid, tcon, cifs_sb, + full_path, &reparse_tag); + cifs_dbg(FYI, "reparse tag 0x%x\n", reparse_tag); + } + cifs_all_info_to_fattr(&fattr, data, sb, adjust_tz, + is_reparse_point, reparse_tag); break; case -EREMOTE: /* DFS link, no metadata available on this server */ diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index df6212e55e10..1f900b81c34a 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -506,7 +506,7 @@ move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src) int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - FILE_ALL_INFO *data, bool *adjust_tz, bool *symlink) + FILE_ALL_INFO *data, bool *adjust_tz, bool *reparse) { int rc; struct smb2_file_all_info *smb2_data; @@ -516,7 +516,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cached_fid *cfid = NULL; *adjust_tz = false; - *symlink = false; + *reparse = false; smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2, GFP_KERNEL); @@ -548,7 +548,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, ACL_NO_MODE, smb2_data, SMB2_OP_QUERY_INFO, cfile); if (rc == -EOPNOTSUPP) { - *symlink = true; + *reparse = true; create_options |= OPEN_REPARSE_POINT; /* Failed on a symbolic link - query a reparse point info */ @@ -570,7 +570,7 @@ out: int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - struct smb311_posix_qinfo *data, bool *adjust_tz, bool *symlink) + struct smb311_posix_qinfo *data, bool *adjust_tz, bool *reparse) { int rc; __u32 create_options = 0; @@ -578,7 +578,7 @@ smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct smb311_posix_qinfo *smb2_data; *adjust_tz = false; - *symlink = false; + *reparse = false; /* BB TODO: Make struct larger when add support for parsing owner SIDs */ smb2_data = kzalloc(sizeof(struct smb311_posix_qinfo), @@ -599,7 +599,7 @@ smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, ACL_NO_MODE, smb2_data, SMB2_OP_POSIX_QUERY_INFO, cfile); if (rc == -EOPNOTSUPP) { /* BB TODO: When support for special files added to Samba re-verify this path */ - *symlink = true; + *reparse = true; create_options |= OPEN_REPARSE_POINT; /* Failed on a symbolic link - query a reparse point info */ diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 3cde719ec41b..504766cb6c19 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3034,6 +3034,133 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, return rc; } +int +smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, const char *full_path, + __u32 *tag) +{ + int rc; + __le16 *utf16_path = NULL; + __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; + struct cifs_open_parms oparms; + struct cifs_fid fid; + struct TCP_Server_Info *server = cifs_pick_channel(tcon->ses); + int flags = 0; + struct smb_rqst rqst[3]; + int resp_buftype[3]; + struct kvec rsp_iov[3]; + struct kvec open_iov[SMB2_CREATE_IOV_SIZE]; + struct kvec io_iov[SMB2_IOCTL_IOV_SIZE]; + struct kvec close_iov[1]; + struct smb2_ioctl_rsp *ioctl_rsp; + struct reparse_data_buffer *reparse_buf; + u32 plen; + + cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); + + if (smb3_encryption_required(tcon)) + flags |= CIFS_TRANSFORM_REQ; + + memset(rqst, 0, sizeof(rqst)); + resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER; + memset(rsp_iov, 0, sizeof(rsp_iov)); + + utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); + if (!utf16_path) + return -ENOMEM; + + /* + * setup smb2open - TODO add optimization to call cifs_get_readable_path + * to see if there is a handle already open that we can use + */ + memset(&open_iov, 0, sizeof(open_iov)); + rqst[0].rq_iov = open_iov; + rqst[0].rq_nvec = SMB2_CREATE_IOV_SIZE; + + memset(&oparms, 0, sizeof(oparms)); + oparms.tcon = tcon; + oparms.desired_access = FILE_READ_ATTRIBUTES; + oparms.disposition = FILE_OPEN; + oparms.create_options = cifs_create_options(cifs_sb, OPEN_REPARSE_POINT); + oparms.fid = &fid; + oparms.reconnect = false; + + rc = SMB2_open_init(tcon, server, + &rqst[0], &oplock, &oparms, utf16_path); + if (rc) + goto query_rp_exit; + smb2_set_next_command(tcon, &rqst[0]); + + + /* IOCTL */ + memset(&io_iov, 0, sizeof(io_iov)); + rqst[1].rq_iov = io_iov; + rqst[1].rq_nvec = SMB2_IOCTL_IOV_SIZE; + + rc = SMB2_ioctl_init(tcon, server, + &rqst[1], fid.persistent_fid, + fid.volatile_fid, FSCTL_GET_REPARSE_POINT, + true /* is_fctl */, NULL, 0, + CIFSMaxBufSize - + MAX_SMB2_CREATE_RESPONSE_SIZE - + MAX_SMB2_CLOSE_RESPONSE_SIZE); + if (rc) + goto query_rp_exit; + + smb2_set_next_command(tcon, &rqst[1]); + smb2_set_related(&rqst[1]); + + + /* Close */ + memset(&close_iov, 0, sizeof(close_iov)); + rqst[2].rq_iov = close_iov; + rqst[2].rq_nvec = 1; + + rc = SMB2_close_init(tcon, server, + &rqst[2], COMPOUND_FID, COMPOUND_FID, false); + if (rc) + goto query_rp_exit; + + smb2_set_related(&rqst[2]); + + rc = compound_send_recv(xid, tcon->ses, server, + flags, 3, rqst, + resp_buftype, rsp_iov); + + ioctl_rsp = rsp_iov[1].iov_base; + + /* + * Open was successful and we got an ioctl response. + */ + if (rc == 0) { + /* See MS-FSCC 2.3.23 */ + + reparse_buf = (struct reparse_data_buffer *) + ((char *)ioctl_rsp + + le32_to_cpu(ioctl_rsp->OutputOffset)); + plen = le32_to_cpu(ioctl_rsp->OutputCount); + + if (plen + le32_to_cpu(ioctl_rsp->OutputOffset) > + rsp_iov[1].iov_len) { + cifs_tcon_dbg(FYI, "srv returned invalid ioctl len: %d\n", + plen); + rc = -EIO; + goto query_rp_exit; + } + *tag = le32_to_cpu(reparse_buf->ReparseTag); + } + + query_rp_exit: + kfree(utf16_path); + SMB2_open_free(&rqst[0]); + SMB2_ioctl_free(&rqst[1]); + SMB2_close_free(&rqst[2]); + free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base); + free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base); + free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base); + return rc; +} + static struct cifs_ntsd * get_smb2_acl_by_fid(struct cifs_sb_info *cifs_sb, const struct cifs_fid *cifsfid, u32 *pacllen) @@ -4986,6 +5113,8 @@ struct smb_version_operations smb30_operations = { .can_echo = smb2_can_echo, .echo = SMB2_echo, .query_path_info = smb2_query_path_info, + /* WSL tags introduced long after smb2.1, enable for SMB3, 3.11 only */ + .query_reparse_tag = smb2_query_reparse_tag, .get_srv_inum = smb2_get_srv_inum, .query_file_info = smb2_query_file_info, .set_path_size = smb2_set_path_size, @@ -5097,6 +5226,7 @@ struct smb_version_operations smb311_operations = { .can_echo = smb2_can_echo, .echo = SMB2_echo, .query_path_info = smb2_query_path_info, + .query_reparse_tag = smb2_query_reparse_tag, .get_srv_inum = smb2_get_srv_inum, .query_file_info = smb2_query_file_info, .set_path_size = smb2_set_path_size, diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 171f54965703..f05f9b12f689 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -999,6 +999,31 @@ struct copychunk_ioctl_rsp { __le32 TotalBytesWritten; } __packed; +/* See MS-FSCC 2.3.29 and 2.3.30 */ +struct get_retrieval_pointer_count_req { + __le64 StartingVcn; /* virtual cluster number (signed) */ +} __packed; + +struct get_retrieval_pointer_count_rsp { + __le32 ExtentCount; +} __packed; + +/* + * See MS-FSCC 2.3.33 and 2.3.34 + * request is the same as get_retrieval_point_count_req struct above + */ +struct smb3_extents { + __le64 NextVcn; + __le64 Lcn; /* logical cluster number */ +} __packed; + +struct get_retrieval_pointers_refcount_rsp { + __le32 ExtentCount; + __u32 Reserved; + __le64 StartingVcn; + struct smb3_extents extents[]; +} __packed; + struct fsctl_set_integrity_information_req { __le16 ChecksumAlgorithm; __le16 Reserved; @@ -1640,6 +1665,7 @@ struct smb2_file_rename_info { /* encoding of request for level 10 */ __u64 RootDirectory; /* MBZ for network operations (why says spec?) */ __le32 FileNameLength; char FileName[]; /* New name to be assigned */ + /* padding - overall struct size must be >= 24 so filename + pad >= 6 */ } __packed; /* level 10 Set */ struct smb2_file_link_info { /* encoding of request for level 11 */ @@ -1691,6 +1717,11 @@ struct smb2_file_eof_info { /* encoding of request for level 10 */ __le64 EndOfFile; /* new end of file value */ } __packed; /* level 20 Set */ +struct smb2_file_reparse_point_info { + __le64 IndexNumber; + __le32 Tag; +} __packed; + struct smb2_file_network_open_info { __le64 CreationTime; __le64 LastAccessTime; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 67c50d78caa1..d4110447ee3a 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -77,6 +77,9 @@ extern void close_shroot_lease(struct cached_fid *cfid); extern void close_shroot_lease_locked(struct cached_fid *cfid); extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src); +extern int smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, const char *path, + __u32 *reparse_tag); extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, FILE_ALL_INFO *data, diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index 1ff28529cf4b..a0e84747f567 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h @@ -103,6 +103,8 @@ #define FSCTL_SET_ZERO_ON_DEALLOC 0x00090194 /* BB add struct */ #define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */ #define FSCTL_GET_INTEGRITY_INFORMATION 0x0009027C +#define FSCTL_GET_RETRIEVAL_POINTERS_AND_REFCOUNT 0x000903d3 +#define FSCTL_GET_RETRIEVAL_POINTER_COUNT 0x0009042b #define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF #define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */ #define FSCTL_FILE_LEVEL_TRIM 0x00098208 /* BB add struct */ diff --git a/include/linux/hil_mlc.h b/include/linux/hil_mlc.h index 774f7d3b8f6a..369221fd5518 100644 --- a/include/linux/hil_mlc.h +++ b/include/linux/hil_mlc.h @@ -103,7 +103,7 @@ struct hilse_node { /* Methods for back-end drivers, e.g. hp_sdc_mlc */ typedef int (hil_mlc_cts) (hil_mlc *mlc); -typedef void (hil_mlc_out) (hil_mlc *mlc); +typedef int (hil_mlc_out) (hil_mlc *mlc); typedef int (hil_mlc_in) (hil_mlc *mlc, suseconds_t timeout); struct hil_mlc_devinfo { diff --git a/include/linux/prandom.h b/include/linux/prandom.h index aa16e6468f91..bbf4b4ad61df 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -16,12 +16,62 @@ void prandom_bytes(void *buf, size_t nbytes); void prandom_seed(u32 seed); void prandom_reseed_late(void); +DECLARE_PER_CPU(unsigned long, net_rand_noise); + +#define PRANDOM_ADD_NOISE(a, b, c, d) \ + prandom_u32_add_noise((unsigned long)(a), (unsigned long)(b), \ + (unsigned long)(c), (unsigned long)(d)) + +#if BITS_PER_LONG == 64 +/* + * The core SipHash round function. Each line can be executed in + * parallel given enough CPU resources. + */ +#define PRND_SIPROUND(v0, v1, v2, v3) ( \ + v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \ + v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \ + v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \ + v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \ +) + +#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261) +#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573) + +#elif BITS_PER_LONG == 32 +/* + * On 32-bit machines, we use HSipHash, a reduced-width version of SipHash. + * This is weaker, but 32-bit machines are not used for high-traffic + * applications, so there is less output for an attacker to analyze. + */ +#define PRND_SIPROUND(v0, v1, v2, v3) ( \ + v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \ + v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \ + v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \ + v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \ +) +#define PRND_K0 0x6c796765 +#define PRND_K1 0x74656462 + +#else +#error Unsupported BITS_PER_LONG +#endif + +static inline void prandom_u32_add_noise(unsigned long a, unsigned long b, + unsigned long c, unsigned long d) +{ + /* + * This is not used cryptographically; it's just + * a convenient 4-word hash function. (3 xor, 2 add, 2 rol) + */ + a ^= raw_cpu_read(net_rand_noise); + PRND_SIPROUND(a, b, c, d); + raw_cpu_write(net_rand_noise, d); +} + struct rnd_state { __u32 s1, s2, s3, s4; }; -DECLARE_PER_CPU(struct rnd_state, net_rand_state); - u32 prandom_u32_state(struct rnd_state *state); void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes); void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); @@ -67,6 +117,7 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed) state->s2 = __seed(i, 8U); state->s3 = __seed(i, 16U); state->s4 = __seed(i, 128U); + PRANDOM_ADD_NOISE(state, i, 0, 0); } /* Pseudo random number generator from numerical recipes. */ diff --git a/include/xen/events.h b/include/xen/events.h index 3b8155c2ea03..8ec418e30c7f 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -35,16 +35,8 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, unsigned long irqflags, const char *devname, void *dev_id); -int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, - evtchn_port_t remote_port); int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain, evtchn_port_t remote_port); -int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - evtchn_port_t remote_port, - irq_handler_t handler, - unsigned long irqflags, - const char *devname, - void *dev_id); int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain, evtchn_port_t remote_port, irq_handler_t handler, diff --git a/kernel/capability.c b/kernel/capability.c index 7c59b096c98a..de7eac903a2a 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -418,7 +418,7 @@ EXPORT_SYMBOL(ns_capable_noaudit); /** * ns_capable_setid - Determine if the current task has a superior capability * in effect, while signalling that this check is being done from within a - * setid syscall. + * setid or setgroups syscall. * @ns: The usernamespace we want the capability in * @cap: The capability to be tested for * diff --git a/kernel/groups.c b/kernel/groups.c index 6ee6691f6839..fe7e6385530e 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -178,7 +178,7 @@ bool may_setgroups(void) { struct user_namespace *user_ns = current_user_ns(); - return ns_capable(user_ns, CAP_SETGID) && + return ns_capable_setid(user_ns, CAP_SETGID) && userns_may_setgroups(user_ns); } diff --git a/kernel/sys.c b/kernel/sys.c index 6401880dff74..84594bcd886e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -373,7 +373,7 @@ long __sys_setregid(gid_t rgid, gid_t egid) if (rgid != (gid_t) -1) { if (gid_eq(old->gid, krgid) || gid_eq(old->egid, krgid) || - ns_capable(old->user_ns, CAP_SETGID)) + ns_capable_setid(old->user_ns, CAP_SETGID)) new->gid = krgid; else goto error; @@ -382,7 +382,7 @@ long __sys_setregid(gid_t rgid, gid_t egid) if (gid_eq(old->gid, kegid) || gid_eq(old->egid, kegid) || gid_eq(old->sgid, kegid) || - ns_capable(old->user_ns, CAP_SETGID)) + ns_capable_setid(old->user_ns, CAP_SETGID)) new->egid = kegid; else goto error; @@ -432,7 +432,7 @@ long __sys_setgid(gid_t gid) old = current_cred(); retval = -EPERM; - if (ns_capable(old->user_ns, CAP_SETGID)) + if (ns_capable_setid(old->user_ns, CAP_SETGID)) new->gid = new->egid = new->sgid = new->fsgid = kgid; else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) new->egid = new->fsgid = kgid; @@ -744,7 +744,7 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) old = current_cred(); retval = -EPERM; - if (!ns_capable(old->user_ns, CAP_SETGID)) { + if (!ns_capable_setid(old->user_ns, CAP_SETGID)) { if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid)) goto error; @@ -871,7 +871,7 @@ long __sys_setfsgid(gid_t gid) if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || - ns_capable(old->user_ns, CAP_SETGID)) { + ns_capable_setid(old->user_ns, CAP_SETGID)) { if (!gid_eq(kgid, old->fsgid)) { new->fsgid = kgid; if (security_task_fix_setgid(new,old,LSM_SETID_FS) == 0) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index dda05f4b7a1f..de37e33a868d 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1706,6 +1706,8 @@ void update_process_times(int user_tick) { struct task_struct *p = current; + PRANDOM_ADD_NOISE(jiffies, user_tick, p, 0); + /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); run_local_timers(); @@ -1717,13 +1719,6 @@ void update_process_times(int user_tick) scheduler_tick(); if (IS_ENABLED(CONFIG_POSIX_TIMERS)) run_posix_cpu_timers(); - - /* The current CPU might make use of net randoms without receiving IRQs - * to renew them often enough. Let's update the net_rand_state from a - * non-constant value that's not affine to the number of calls to make - * sure it's updated when there's some activity (we don't care in idle). - */ - this_cpu_add(net_rand_state.s1, rol32(jiffies, 24) + user_tick); } /** diff --git a/lib/random32.c b/lib/random32.c index dfb9981ab798..4d0e05e471d7 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -38,19 +38,10 @@ #include <linux/jiffies.h> #include <linux/random.h> #include <linux/sched.h> +#include <linux/bitops.h> #include <asm/unaligned.h> #include <trace/events/random.h> -#ifdef CONFIG_RANDOM32_SELFTEST -static void __init prandom_state_selftest(void); -#else -static inline void prandom_state_selftest(void) -{ -} -#endif - -DEFINE_PER_CPU(struct rnd_state, net_rand_state) __latent_entropy; - /** * prandom_u32_state - seeded pseudo-random number generator. * @state: pointer to state structure holding seeded state. @@ -71,26 +62,6 @@ u32 prandom_u32_state(struct rnd_state *state) EXPORT_SYMBOL(prandom_u32_state); /** - * prandom_u32 - pseudo random number generator - * - * A 32 bit pseudo-random number is generated using a fast - * algorithm suitable for simulation. This algorithm is NOT - * considered safe for cryptographic use. - */ -u32 prandom_u32(void) -{ - struct rnd_state *state = &get_cpu_var(net_rand_state); - u32 res; - - res = prandom_u32_state(state); - trace_prandom_u32(res); - put_cpu_var(net_rand_state); - - return res; -} -EXPORT_SYMBOL(prandom_u32); - -/** * prandom_bytes_state - get the requested number of pseudo-random bytes * * @state: pointer to state structure holding seeded state. @@ -121,20 +92,6 @@ void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes) } EXPORT_SYMBOL(prandom_bytes_state); -/** - * prandom_bytes - get the requested number of pseudo-random bytes - * @buf: where to copy the pseudo-random bytes to - * @bytes: the requested number of bytes - */ -void prandom_bytes(void *buf, size_t bytes) -{ - struct rnd_state *state = &get_cpu_var(net_rand_state); - - prandom_bytes_state(state, buf, bytes); - put_cpu_var(net_rand_state); -} -EXPORT_SYMBOL(prandom_bytes); - static void prandom_warmup(struct rnd_state *state) { /* Calling RNG ten times to satisfy recurrence condition */ @@ -150,96 +107,6 @@ static void prandom_warmup(struct rnd_state *state) prandom_u32_state(state); } -static u32 __extract_hwseed(void) -{ - unsigned int val = 0; - - (void)(arch_get_random_seed_int(&val) || - arch_get_random_int(&val)); - - return val; -} - -static void prandom_seed_early(struct rnd_state *state, u32 seed, - bool mix_with_hwseed) -{ -#define LCG(x) ((x) * 69069U) /* super-duper LCG */ -#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0) - state->s1 = __seed(HWSEED() ^ LCG(seed), 2U); - state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U); - state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U); - state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U); -} - -/** - * prandom_seed - add entropy to pseudo random number generator - * @entropy: entropy value - * - * Add some additional entropy to the prandom pool. - */ -void prandom_seed(u32 entropy) -{ - int i; - /* - * No locking on the CPUs, but then somewhat random results are, well, - * expected. - */ - for_each_possible_cpu(i) { - struct rnd_state *state = &per_cpu(net_rand_state, i); - - state->s1 = __seed(state->s1 ^ entropy, 2U); - prandom_warmup(state); - } -} -EXPORT_SYMBOL(prandom_seed); - -/* - * Generate some initially weak seeding values to allow - * to start the prandom_u32() engine. - */ -static int __init prandom_init(void) -{ - int i; - - prandom_state_selftest(); - - for_each_possible_cpu(i) { - struct rnd_state *state = &per_cpu(net_rand_state, i); - u32 weak_seed = (i + jiffies) ^ random_get_entropy(); - - prandom_seed_early(state, weak_seed, true); - prandom_warmup(state); - } - - return 0; -} -core_initcall(prandom_init); - -static void __prandom_timer(struct timer_list *unused); - -static DEFINE_TIMER(seed_timer, __prandom_timer); - -static void __prandom_timer(struct timer_list *unused) -{ - u32 entropy; - unsigned long expires; - - get_random_bytes(&entropy, sizeof(entropy)); - prandom_seed(entropy); - - /* reseed every ~60 seconds, in [40 .. 80) interval with slack */ - expires = 40 + prandom_u32_max(40); - seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC); - - add_timer(&seed_timer); -} - -static void __init __prandom_start_seed_timer(void) -{ - seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC); - add_timer(&seed_timer); -} - void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state) { int i; @@ -259,51 +126,6 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state) } EXPORT_SYMBOL(prandom_seed_full_state); -/* - * Generate better values after random number generator - * is fully initialized. - */ -static void __prandom_reseed(bool late) -{ - unsigned long flags; - static bool latch = false; - static DEFINE_SPINLOCK(lock); - - /* Asking for random bytes might result in bytes getting - * moved into the nonblocking pool and thus marking it - * as initialized. In this case we would double back into - * this function and attempt to do a late reseed. - * Ignore the pointless attempt to reseed again if we're - * already waiting for bytes when the nonblocking pool - * got initialized. - */ - - /* only allow initial seeding (late == false) once */ - if (!spin_trylock_irqsave(&lock, flags)) - return; - - if (latch && !late) - goto out; - - latch = true; - prandom_seed_full_state(&net_rand_state); -out: - spin_unlock_irqrestore(&lock, flags); -} - -void prandom_reseed_late(void) -{ - __prandom_reseed(true); -} - -static int __init prandom_reseed(void) -{ - __prandom_reseed(false); - __prandom_start_seed_timer(); - return 0; -} -late_initcall(prandom_reseed); - #ifdef CONFIG_RANDOM32_SELFTEST static struct prandom_test1 { u32 seed; @@ -423,7 +245,28 @@ static struct prandom_test2 { { 407983964U, 921U, 728767059U }, }; -static void __init prandom_state_selftest(void) +static u32 __extract_hwseed(void) +{ + unsigned int val = 0; + + (void)(arch_get_random_seed_int(&val) || + arch_get_random_int(&val)); + + return val; +} + +static void prandom_seed_early(struct rnd_state *state, u32 seed, + bool mix_with_hwseed) +{ +#define LCG(x) ((x) * 69069U) /* super-duper LCG */ +#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0) + state->s1 = __seed(HWSEED() ^ LCG(seed), 2U); + state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U); + state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U); + state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U); +} + +static int __init prandom_state_selftest(void) { int i, j, errors = 0, runs = 0; bool error = false; @@ -463,5 +306,327 @@ static void __init prandom_state_selftest(void) pr_warn("prandom: %d/%d self tests failed\n", errors, runs); else pr_info("prandom: %d self tests passed\n", runs); + return 0; } +core_initcall(prandom_state_selftest); #endif + +/* + * The prandom_u32() implementation is now completely separate from the + * prandom_state() functions, which are retained (for now) for compatibility. + * + * Because of (ab)use in the networking code for choosing random TCP/UDP port + * numbers, which open DoS possibilities if guessable, we want something + * stronger than a standard PRNG. But the performance requirements of + * the network code do not allow robust crypto for this application. + * + * So this is a homebrew Junior Spaceman implementation, based on the + * lowest-latency trustworthy crypto primitive available, SipHash. + * (The authors of SipHash have not been consulted about this abuse of + * their work.) + * + * Standard SipHash-2-4 uses 2n+4 rounds to hash n words of input to + * one word of output. This abbreviated version uses 2 rounds per word + * of output. + */ + +struct siprand_state { + unsigned long v0; + unsigned long v1; + unsigned long v2; + unsigned long v3; +}; + +static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy; +DEFINE_PER_CPU(unsigned long, net_rand_noise); +EXPORT_PER_CPU_SYMBOL(net_rand_noise); + +/* + * This is the core CPRNG function. As "pseudorandom", this is not used + * for truly valuable things, just intended to be a PITA to guess. + * For maximum speed, we do just two SipHash rounds per word. This is + * the same rate as 4 rounds per 64 bits that SipHash normally uses, + * so hopefully it's reasonably secure. + * + * There are two changes from the official SipHash finalization: + * - We omit some constants XORed with v2 in the SipHash spec as irrelevant; + * they are there only to make the output rounds distinct from the input + * rounds, and this application has no input rounds. + * - Rather than returning v0^v1^v2^v3, return v1+v3. + * If you look at the SipHash round, the last operation on v3 is + * "v3 ^= v0", so "v0 ^ v3" just undoes that, a waste of time. + * Likewise "v1 ^= v2". (The rotate of v2 makes a difference, but + * it still cancels out half of the bits in v2 for no benefit.) + * Second, since the last combining operation was xor, continue the + * pattern of alternating xor/add for a tiny bit of extra non-linearity. + */ +static inline u32 siprand_u32(struct siprand_state *s) +{ + unsigned long v0 = s->v0, v1 = s->v1, v2 = s->v2, v3 = s->v3; + unsigned long n = raw_cpu_read(net_rand_noise); + + v3 ^= n; + PRND_SIPROUND(v0, v1, v2, v3); + PRND_SIPROUND(v0, v1, v2, v3); + v0 ^= n; + s->v0 = v0; s->v1 = v1; s->v2 = v2; s->v3 = v3; + return v1 + v3; +} + + +/** + * prandom_u32 - pseudo random number generator + * + * A 32 bit pseudo-random number is generated using a fast + * algorithm suitable for simulation. This algorithm is NOT + * considered safe for cryptographic use. + */ +u32 prandom_u32(void) +{ + struct siprand_state *state = get_cpu_ptr(&net_rand_state); + u32 res = siprand_u32(state); + + trace_prandom_u32(res); + put_cpu_ptr(&net_rand_state); + return res; +} +EXPORT_SYMBOL(prandom_u32); + +/** + * prandom_bytes - get the requested number of pseudo-random bytes + * @buf: where to copy the pseudo-random bytes to + * @bytes: the requested number of bytes + */ +void prandom_bytes(void *buf, size_t bytes) +{ + struct siprand_state *state = get_cpu_ptr(&net_rand_state); + u8 *ptr = buf; + + while (bytes >= sizeof(u32)) { + put_unaligned(siprand_u32(state), (u32 *)ptr); + ptr += sizeof(u32); + bytes -= sizeof(u32); + } + + if (bytes > 0) { + u32 rem = siprand_u32(state); + + do { + *ptr++ = (u8)rem; + rem >>= BITS_PER_BYTE; + } while (--bytes > 0); + } + put_cpu_ptr(&net_rand_state); +} +EXPORT_SYMBOL(prandom_bytes); + +/** + * prandom_seed - add entropy to pseudo random number generator + * @entropy: entropy value + * + * Add some additional seed material to the prandom pool. + * The "entropy" is actually our IP address (the only caller is + * the network code), not for unpredictability, but to ensure that + * different machines are initialized differently. + */ +void prandom_seed(u32 entropy) +{ + int i; + + add_device_randomness(&entropy, sizeof(entropy)); + + for_each_possible_cpu(i) { + struct siprand_state *state = per_cpu_ptr(&net_rand_state, i); + unsigned long v0 = state->v0, v1 = state->v1; + unsigned long v2 = state->v2, v3 = state->v3; + + do { + v3 ^= entropy; + PRND_SIPROUND(v0, v1, v2, v3); + PRND_SIPROUND(v0, v1, v2, v3); + v0 ^= entropy; + } while (unlikely(!v0 || !v1 || !v2 || !v3)); + + WRITE_ONCE(state->v0, v0); + WRITE_ONCE(state->v1, v1); + WRITE_ONCE(state->v2, v2); + WRITE_ONCE(state->v3, v3); + } +} +EXPORT_SYMBOL(prandom_seed); + +/* + * Generate some initially weak seeding values to allow + * the prandom_u32() engine to be started. + */ +static int __init prandom_init_early(void) +{ + int i; + unsigned long v0, v1, v2, v3; + + if (!arch_get_random_long(&v0)) + v0 = jiffies; + if (!arch_get_random_long(&v1)) + v1 = random_get_entropy(); + v2 = v0 ^ PRND_K0; + v3 = v1 ^ PRND_K1; + + for_each_possible_cpu(i) { + struct siprand_state *state; + + v3 ^= i; + PRND_SIPROUND(v0, v1, v2, v3); + PRND_SIPROUND(v0, v1, v2, v3); + v0 ^= i; + + state = per_cpu_ptr(&net_rand_state, i); + state->v0 = v0; state->v1 = v1; + state->v2 = v2; state->v3 = v3; + } + + return 0; +} +core_initcall(prandom_init_early); + + +/* Stronger reseeding when available, and periodically thereafter. */ +static void prandom_reseed(struct timer_list *unused); + +static DEFINE_TIMER(seed_timer, prandom_reseed); + +static void prandom_reseed(struct timer_list *unused) +{ + unsigned long expires; + int i; + + /* + * Reinitialize each CPU's PRNG with 128 bits of key. + * No locking on the CPUs, but then somewhat random results are, + * well, expected. + */ + for_each_possible_cpu(i) { + struct siprand_state *state; + unsigned long v0 = get_random_long(), v2 = v0 ^ PRND_K0; + unsigned long v1 = get_random_long(), v3 = v1 ^ PRND_K1; +#if BITS_PER_LONG == 32 + int j; + + /* + * On 32-bit machines, hash in two extra words to + * approximate 128-bit key length. Not that the hash + * has that much security, but this prevents a trivial + * 64-bit brute force. + */ + for (j = 0; j < 2; j++) { + unsigned long m = get_random_long(); + + v3 ^= m; + PRND_SIPROUND(v0, v1, v2, v3); + PRND_SIPROUND(v0, v1, v2, v3); + v0 ^= m; + } +#endif + /* + * Probably impossible in practice, but there is a + * theoretical risk that a race between this reseeding + * and the target CPU writing its state back could + * create the all-zero SipHash fixed point. + * + * To ensure that never happens, ensure the state + * we write contains no zero words. + */ + state = per_cpu_ptr(&net_rand_state, i); + WRITE_ONCE(state->v0, v0 ? v0 : -1ul); + WRITE_ONCE(state->v1, v1 ? v1 : -1ul); + WRITE_ONCE(state->v2, v2 ? v2 : -1ul); + WRITE_ONCE(state->v3, v3 ? v3 : -1ul); + } + + /* reseed every ~60 seconds, in [40 .. 80) interval with slack */ + expires = round_jiffies(jiffies + 40 * HZ + prandom_u32_max(40 * HZ)); + mod_timer(&seed_timer, expires); +} + +/* + * The random ready callback can be called from almost any interrupt. + * To avoid worrying about whether it's safe to delay that interrupt + * long enough to seed all CPUs, just schedule an immediate timer event. + */ +static void prandom_timer_start(struct random_ready_callback *unused) +{ + mod_timer(&seed_timer, jiffies); +} + +#ifdef CONFIG_RANDOM32_SELFTEST +/* Principle: True 32-bit random numbers will all have 16 differing bits on + * average. For each 32-bit number, there are 601M numbers differing by 16 + * bits, and 89% of the numbers differ by at least 12 bits. Note that more + * than 16 differing bits also implies a correlation with inverted bits. Thus + * we take 1024 random numbers and compare each of them to the other ones, + * counting the deviation of correlated bits to 16. Constants report 32, + * counters 32-log2(TEST_SIZE), and pure randoms, around 6 or lower. With the + * u32 total, TEST_SIZE may be as large as 4096 samples. + */ +#define TEST_SIZE 1024 +static int __init prandom32_state_selftest(void) +{ + unsigned int x, y, bits, samples; + u32 xor, flip; + u32 total; + u32 *data; + + data = kmalloc(sizeof(*data) * TEST_SIZE, GFP_KERNEL); + if (!data) + return 0; + + for (samples = 0; samples < TEST_SIZE; samples++) + data[samples] = prandom_u32(); + + flip = total = 0; + for (x = 0; x < samples; x++) { + for (y = 0; y < samples; y++) { + if (x == y) + continue; + xor = data[x] ^ data[y]; + flip |= xor; + bits = hweight32(xor); + total += (bits - 16) * (bits - 16); + } + } + + /* We'll return the average deviation as 2*sqrt(corr/samples), which + * is also sqrt(4*corr/samples) which provides a better resolution. + */ + bits = int_sqrt(total / (samples * (samples - 1)) * 4); + if (bits > 6) + pr_warn("prandom32: self test failed (at least %u bits" + " correlated, fixed_mask=%#x fixed_value=%#x\n", + bits, ~flip, data[0] & ~flip); + else + pr_info("prandom32: self test passed (less than %u bits" + " correlated)\n", + bits+1); + kfree(data); + return 0; +} +core_initcall(prandom32_state_selftest); +#endif /* CONFIG_RANDOM32_SELFTEST */ + +/* + * Start periodic full reseeding as soon as strong + * random numbers are available. + */ +static int __init prandom_init_late(void) +{ + static struct random_ready_callback random_ready = { + .func = prandom_timer_start + }; + int ret = add_random_ready_callback(&random_ready); + + if (ret == -EALREADY) { + prandom_timer_start(&random_ready); + ret = 0; + } + return ret; +} +late_initcall(prandom_init_late); diff --git a/net/core/dev.c b/net/core/dev.c index 9499a414d67e..82dc6b48e45f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -145,6 +145,7 @@ #include <linux/indirect_call_wrapper.h> #include <net/devlink.h> #include <linux/pm_runtime.h> +#include <linux/prandom.h> #include "net-sysfs.h" @@ -3558,6 +3559,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev, dev_queue_xmit_nit(skb, dev); len = skb->len; + PRANDOM_ADD_NOISE(skb, dev, txq, len + jiffies); trace_net_dev_start_xmit(skb, dev); rc = netdev_start_xmit(skb, dev, txq, more); trace_net_dev_xmit(skb, rc, dev, len); @@ -4130,6 +4132,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) if (!skb) goto out; + PRANDOM_ADD_NOISE(skb, dev, txq, jiffies); HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { @@ -4195,6 +4198,7 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) skb_set_queue_mapping(skb, queue_id); txq = skb_get_tx_queue(dev, skb); + PRANDOM_ADD_NOISE(skb, dev, txq, jiffies); local_bh_disable(); diff --git a/security/safesetid/lsm.c b/security/safesetid/lsm.c index 7760019ad35d..8a176b6adbe5 100644 --- a/security/safesetid/lsm.c +++ b/security/safesetid/lsm.c @@ -24,20 +24,36 @@ /* Flag indicating whether initialization completed */ int safesetid_initialized; -struct setuid_ruleset __rcu *safesetid_setuid_rules; +struct setid_ruleset __rcu *safesetid_setuid_rules; +struct setid_ruleset __rcu *safesetid_setgid_rules; + /* Compute a decision for a transition from @src to @dst under @policy. */ -enum sid_policy_type _setuid_policy_lookup(struct setuid_ruleset *policy, - kuid_t src, kuid_t dst) +enum sid_policy_type _setid_policy_lookup(struct setid_ruleset *policy, + kid_t src, kid_t dst) { - struct setuid_rule *rule; + struct setid_rule *rule; enum sid_policy_type result = SIDPOL_DEFAULT; - hash_for_each_possible(policy->rules, rule, next, __kuid_val(src)) { - if (!uid_eq(rule->src_uid, src)) - continue; - if (uid_eq(rule->dst_uid, dst)) - return SIDPOL_ALLOWED; + if (policy->type == UID) { + hash_for_each_possible(policy->rules, rule, next, __kuid_val(src.uid)) { + if (!uid_eq(rule->src_id.uid, src.uid)) + continue; + if (uid_eq(rule->dst_id.uid, dst.uid)) + return SIDPOL_ALLOWED; + result = SIDPOL_CONSTRAINED; + } + } else if (policy->type == GID) { + hash_for_each_possible(policy->rules, rule, next, __kgid_val(src.gid)) { + if (!gid_eq(rule->src_id.gid, src.gid)) + continue; + if (gid_eq(rule->dst_id.gid, dst.gid)){ + return SIDPOL_ALLOWED; + } + result = SIDPOL_CONSTRAINED; + } + } else { + /* Should not reach here, report the ID as contrainsted */ result = SIDPOL_CONSTRAINED; } return result; @@ -47,15 +63,26 @@ enum sid_policy_type _setuid_policy_lookup(struct setuid_ruleset *policy, * Compute a decision for a transition from @src to @dst under the active * policy. */ -static enum sid_policy_type setuid_policy_lookup(kuid_t src, kuid_t dst) +static enum sid_policy_type setid_policy_lookup(kid_t src, kid_t dst, enum setid_type new_type) { enum sid_policy_type result = SIDPOL_DEFAULT; - struct setuid_ruleset *pol; + struct setid_ruleset *pol; rcu_read_lock(); - pol = rcu_dereference(safesetid_setuid_rules); - if (pol) - result = _setuid_policy_lookup(pol, src, dst); + if (new_type == UID) + pol = rcu_dereference(safesetid_setuid_rules); + else if (new_type == GID) + pol = rcu_dereference(safesetid_setgid_rules); + else { /* Should not reach here */ + result = SIDPOL_CONSTRAINED; + rcu_read_unlock(); + return result; + } + + if (pol) { + pol->type = new_type; + result = _setid_policy_lookup(pol, src, dst); + } rcu_read_unlock(); return result; } @@ -65,57 +92,101 @@ static int safesetid_security_capable(const struct cred *cred, int cap, unsigned int opts) { - /* We're only interested in CAP_SETUID. */ - if (cap != CAP_SETUID) + /* We're only interested in CAP_SETUID and CAP_SETGID. */ + if (cap != CAP_SETUID && cap != CAP_SETGID) return 0; /* - * If CAP_SETUID is currently used for a set*uid() syscall, we want to + * If CAP_SET{U/G}ID is currently used for a setid() syscall, we want to * let it go through here; the real security check happens later, in the - * task_fix_setuid hook. + * task_fix_set{u/g}id hook. + * + * NOTE: + * Until we add support for restricting setgroups() calls, GID security + * policies offer no meaningful security since we always return 0 here + * when called from within the setgroups() syscall and there is no + * additional hook later on to enforce security policies for setgroups(). */ if ((opts & CAP_OPT_INSETID) != 0) return 0; - /* - * If no policy applies to this task, allow the use of CAP_SETUID for - * other purposes. - */ - if (setuid_policy_lookup(cred->uid, INVALID_UID) == SIDPOL_DEFAULT) + switch (cap) { + case CAP_SETUID: + /* + * If no policy applies to this task, allow the use of CAP_SETUID for + * other purposes. + */ + if (setid_policy_lookup((kid_t){.uid = cred->uid}, INVALID_ID, UID) == SIDPOL_DEFAULT) + return 0; + /* + * Reject use of CAP_SETUID for functionality other than calling + * set*uid() (e.g. setting up userns uid mappings). + */ + pr_warn("Operation requires CAP_SETUID, which is not available to UID %u for operations besides approved set*uid transitions\n", + __kuid_val(cred->uid)); + return -EPERM; + break; + case CAP_SETGID: + /* + * If no policy applies to this task, allow the use of CAP_SETGID for + * other purposes. + */ + if (setid_policy_lookup((kid_t){.gid = cred->gid}, INVALID_ID, GID) == SIDPOL_DEFAULT) + return 0; + /* + * Reject use of CAP_SETUID for functionality other than calling + * set*gid() (e.g. setting up userns gid mappings). + */ + pr_warn("Operation requires CAP_SETGID, which is not available to GID %u for operations besides approved set*gid transitions\n", + __kuid_val(cred->uid)); + return -EPERM; + break; + default: + /* Error, the only capabilities were checking for is CAP_SETUID/GID */ return 0; - - /* - * Reject use of CAP_SETUID for functionality other than calling - * set*uid() (e.g. setting up userns uid mappings). - */ - pr_warn("Operation requires CAP_SETUID, which is not available to UID %u for operations besides approved set*uid transitions\n", - __kuid_val(cred->uid)); - return -EPERM; + break; + } + return 0; } /* * Check whether a caller with old credentials @old is allowed to switch to - * credentials that contain @new_uid. + * credentials that contain @new_id. */ -static bool uid_permitted_for_cred(const struct cred *old, kuid_t new_uid) +static bool id_permitted_for_cred(const struct cred *old, kid_t new_id, enum setid_type new_type) { bool permitted; - /* If our old creds already had this UID in it, it's fine. */ - if (uid_eq(new_uid, old->uid) || uid_eq(new_uid, old->euid) || - uid_eq(new_uid, old->suid)) - return true; + /* If our old creds already had this ID in it, it's fine. */ + if (new_type == UID) { + if (uid_eq(new_id.uid, old->uid) || uid_eq(new_id.uid, old->euid) || + uid_eq(new_id.uid, old->suid)) + return true; + } else if (new_type == GID){ + if (gid_eq(new_id.gid, old->gid) || gid_eq(new_id.gid, old->egid) || + gid_eq(new_id.gid, old->sgid)) + return true; + } else /* Error, new_type is an invalid type */ + return false; /* * Transitions to new UIDs require a check against the policy of the old * RUID. */ permitted = - setuid_policy_lookup(old->uid, new_uid) != SIDPOL_CONSTRAINED; + setid_policy_lookup((kid_t){.uid = old->uid}, new_id, new_type) != SIDPOL_CONSTRAINED; + if (!permitted) { - pr_warn("UID transition ((%d,%d,%d) -> %d) blocked\n", - __kuid_val(old->uid), __kuid_val(old->euid), - __kuid_val(old->suid), __kuid_val(new_uid)); + if (new_type == UID) { + pr_warn("UID transition ((%d,%d,%d) -> %d) blocked\n", + __kuid_val(old->uid), __kuid_val(old->euid), + __kuid_val(old->suid), __kuid_val(new_id.uid)); + } else if (new_type == GID) { + pr_warn("GID transition ((%d,%d,%d) -> %d) blocked\n", + __kgid_val(old->gid), __kgid_val(old->egid), + __kgid_val(old->sgid), __kgid_val(new_id.gid)); + } else /* Error, new_type is an invalid type */ + return false; } return permitted; } @@ -131,18 +202,42 @@ static int safesetid_task_fix_setuid(struct cred *new, { /* Do nothing if there are no setuid restrictions for our old RUID. */ - if (setuid_policy_lookup(old->uid, INVALID_UID) == SIDPOL_DEFAULT) + if (setid_policy_lookup((kid_t){.uid = old->uid}, INVALID_ID, UID) == SIDPOL_DEFAULT) + return 0; + + if (id_permitted_for_cred(old, (kid_t){.uid = new->uid}, UID) && + id_permitted_for_cred(old, (kid_t){.uid = new->euid}, UID) && + id_permitted_for_cred(old, (kid_t){.uid = new->suid}, UID) && + id_permitted_for_cred(old, (kid_t){.uid = new->fsuid}, UID)) + return 0; + + /* + * Kill this process to avoid potential security vulnerabilities + * that could arise from a missing allowlist entry preventing a + * privileged process from dropping to a lesser-privileged one. + */ + force_sig(SIGKILL); + return -EACCES; +} + +static int safesetid_task_fix_setgid(struct cred *new, + const struct cred *old, + int flags) +{ + + /* Do nothing if there are no setgid restrictions for our old RGID. */ + if (setid_policy_lookup((kid_t){.gid = old->gid}, INVALID_ID, GID) == SIDPOL_DEFAULT) return 0; - if (uid_permitted_for_cred(old, new->uid) && - uid_permitted_for_cred(old, new->euid) && - uid_permitted_for_cred(old, new->suid) && - uid_permitted_for_cred(old, new->fsuid)) + if (id_permitted_for_cred(old, (kid_t){.gid = new->gid}, GID) && + id_permitted_for_cred(old, (kid_t){.gid = new->egid}, GID) && + id_permitted_for_cred(old, (kid_t){.gid = new->sgid}, GID) && + id_permitted_for_cred(old, (kid_t){.gid = new->fsgid}, GID)) return 0; /* * Kill this process to avoid potential security vulnerabilities - * that could arise from a missing whitelist entry preventing a + * that could arise from a missing allowlist entry preventing a * privileged process from dropping to a lesser-privileged one. */ force_sig(SIGKILL); @@ -151,6 +246,7 @@ static int safesetid_task_fix_setuid(struct cred *new, static struct security_hook_list safesetid_security_hooks[] = { LSM_HOOK_INIT(task_fix_setuid, safesetid_task_fix_setuid), + LSM_HOOK_INIT(task_fix_setgid, safesetid_task_fix_setgid), LSM_HOOK_INIT(capable, safesetid_security_capable) }; diff --git a/security/safesetid/lsm.h b/security/safesetid/lsm.h index db6d16e6bbc3..bde8c43a3767 100644 --- a/security/safesetid/lsm.h +++ b/security/safesetid/lsm.h @@ -27,27 +27,47 @@ enum sid_policy_type { SIDPOL_ALLOWED /* target ID explicitly allowed */ }; +typedef union { + kuid_t uid; + kgid_t gid; +} kid_t; + +enum setid_type { + UID, + GID +}; + /* - * Hash table entry to store safesetid policy signifying that 'src_uid' - * can setuid to 'dst_uid'. + * Hash table entry to store safesetid policy signifying that 'src_id' + * can set*id to 'dst_id'. */ -struct setuid_rule { +struct setid_rule { struct hlist_node next; - kuid_t src_uid; - kuid_t dst_uid; + kid_t src_id; + kid_t dst_id; + + /* Flag to signal if rule is for UID's or GID's */ + enum setid_type type; }; #define SETID_HASH_BITS 8 /* 256 buckets in hash table */ -struct setuid_ruleset { +/* Extension of INVALID_UID/INVALID_GID for kid_t type */ +#define INVALID_ID (kid_t){.uid = INVALID_UID} + +struct setid_ruleset { DECLARE_HASHTABLE(rules, SETID_HASH_BITS); char *policy_str; struct rcu_head rcu; + + //Flag to signal if ruleset is for UID's or GID's + enum setid_type type; }; -enum sid_policy_type _setuid_policy_lookup(struct setuid_ruleset *policy, - kuid_t src, kuid_t dst); +enum sid_policy_type _setid_policy_lookup(struct setid_ruleset *policy, + kid_t src, kid_t dst); -extern struct setuid_ruleset __rcu *safesetid_setuid_rules; +extern struct setid_ruleset __rcu *safesetid_setuid_rules; +extern struct setid_ruleset __rcu *safesetid_setgid_rules; #endif /* _SAFESETID_H */ diff --git a/security/safesetid/securityfs.c b/security/safesetid/securityfs.c index f8bc574cea9c..25310468bcdd 100644 --- a/security/safesetid/securityfs.c +++ b/security/safesetid/securityfs.c @@ -19,22 +19,23 @@ #include "lsm.h" -static DEFINE_MUTEX(policy_update_lock); +static DEFINE_MUTEX(uid_policy_update_lock); +static DEFINE_MUTEX(gid_policy_update_lock); /* - * In the case the input buffer contains one or more invalid UIDs, the kuid_t + * In the case the input buffer contains one or more invalid IDs, the kid_t * variables pointed to by @parent and @child will get updated but this * function will return an error. * Contents of @buf may be modified. */ static int parse_policy_line(struct file *file, char *buf, - struct setuid_rule *rule) + struct setid_rule *rule) { char *child_str; int ret; u32 parsed_parent, parsed_child; - /* Format of |buf| string should be <UID>:<UID>. */ + /* Format of |buf| string should be <UID>:<UID> or <GID>:<GID> */ child_str = strchr(buf, ':'); if (child_str == NULL) return -EINVAL; @@ -49,20 +50,29 @@ static int parse_policy_line(struct file *file, char *buf, if (ret) return ret; - rule->src_uid = make_kuid(file->f_cred->user_ns, parsed_parent); - rule->dst_uid = make_kuid(file->f_cred->user_ns, parsed_child); - if (!uid_valid(rule->src_uid) || !uid_valid(rule->dst_uid)) + if (rule->type == UID){ + rule->src_id.uid = make_kuid(file->f_cred->user_ns, parsed_parent); + rule->dst_id.uid = make_kuid(file->f_cred->user_ns, parsed_child); + if (!uid_valid(rule->src_id.uid) || !uid_valid(rule->dst_id.uid)) + return -EINVAL; + } else if (rule->type == GID){ + rule->src_id.gid = make_kgid(file->f_cred->user_ns, parsed_parent); + rule->dst_id.gid = make_kgid(file->f_cred->user_ns, parsed_child); + if (!gid_valid(rule->src_id.gid) || !gid_valid(rule->dst_id.gid)) + return -EINVAL; + } else { + /* Error, rule->type is an invalid type */ return -EINVAL; - + } return 0; } static void __release_ruleset(struct rcu_head *rcu) { - struct setuid_ruleset *pol = - container_of(rcu, struct setuid_ruleset, rcu); + struct setid_ruleset *pol = + container_of(rcu, struct setid_ruleset, rcu); int bucket; - struct setuid_rule *rule; + struct setid_rule *rule; struct hlist_node *tmp; hash_for_each_safe(pol->rules, bucket, tmp, rule, next) @@ -71,36 +81,55 @@ static void __release_ruleset(struct rcu_head *rcu) kfree(pol); } -static void release_ruleset(struct setuid_ruleset *pol) -{ +static void release_ruleset(struct setid_ruleset *pol){ call_rcu(&pol->rcu, __release_ruleset); } -static void insert_rule(struct setuid_ruleset *pol, struct setuid_rule *rule) +static void insert_rule(struct setid_ruleset *pol, struct setid_rule *rule) { - hash_add(pol->rules, &rule->next, __kuid_val(rule->src_uid)); + if (pol->type == UID) + hash_add(pol->rules, &rule->next, __kuid_val(rule->src_id.uid)); + else if (pol->type == GID) + hash_add(pol->rules, &rule->next, __kgid_val(rule->src_id.gid)); + else /* Error, pol->type is neither UID or GID */ + return; } -static int verify_ruleset(struct setuid_ruleset *pol) +static int verify_ruleset(struct setid_ruleset *pol) { int bucket; - struct setuid_rule *rule, *nrule; + struct setid_rule *rule, *nrule; int res = 0; hash_for_each(pol->rules, bucket, rule, next) { - if (_setuid_policy_lookup(pol, rule->dst_uid, INVALID_UID) == - SIDPOL_DEFAULT) { - pr_warn("insecure policy detected: uid %d is constrained but transitively unconstrained through uid %d\n", - __kuid_val(rule->src_uid), - __kuid_val(rule->dst_uid)); + if (_setid_policy_lookup(pol, rule->dst_id, INVALID_ID) == SIDPOL_DEFAULT) { + if (pol->type == UID) { + pr_warn("insecure policy detected: uid %d is constrained but transitively unconstrained through uid %d\n", + __kuid_val(rule->src_id.uid), + __kuid_val(rule->dst_id.uid)); + } else if (pol->type == GID) { + pr_warn("insecure policy detected: gid %d is constrained but transitively unconstrained through gid %d\n", + __kgid_val(rule->src_id.gid), + __kgid_val(rule->dst_id.gid)); + } else { /* pol->type is an invalid type */ + res = -EINVAL; + return res; + } res = -EINVAL; /* fix it up */ - nrule = kmalloc(sizeof(struct setuid_rule), GFP_KERNEL); + nrule = kmalloc(sizeof(struct setid_rule), GFP_KERNEL); if (!nrule) return -ENOMEM; - nrule->src_uid = rule->dst_uid; - nrule->dst_uid = rule->dst_uid; + if (pol->type == UID){ + nrule->src_id.uid = rule->dst_id.uid; + nrule->dst_id.uid = rule->dst_id.uid; + nrule->type = UID; + } else { /* pol->type must be GID if we've made it to here */ + nrule->src_id.gid = rule->dst_id.gid; + nrule->dst_id.gid = rule->dst_id.gid; + nrule->type = GID; + } insert_rule(pol, nrule); } } @@ -108,16 +137,17 @@ static int verify_ruleset(struct setuid_ruleset *pol) } static ssize_t handle_policy_update(struct file *file, - const char __user *ubuf, size_t len) + const char __user *ubuf, size_t len, enum setid_type policy_type) { - struct setuid_ruleset *pol; + struct setid_ruleset *pol; char *buf, *p, *end; int err; - pol = kmalloc(sizeof(struct setuid_ruleset), GFP_KERNEL); + pol = kmalloc(sizeof(struct setid_ruleset), GFP_KERNEL); if (!pol) return -ENOMEM; pol->policy_str = NULL; + pol->type = policy_type; hash_init(pol->rules); p = buf = memdup_user_nul(ubuf, len); @@ -133,7 +163,7 @@ static ssize_t handle_policy_update(struct file *file, /* policy lines, including the last one, end with \n */ while (*p != '\0') { - struct setuid_rule *rule; + struct setid_rule *rule; end = strchr(p, '\n'); if (end == NULL) { @@ -142,18 +172,18 @@ static ssize_t handle_policy_update(struct file *file, } *end = '\0'; - rule = kmalloc(sizeof(struct setuid_rule), GFP_KERNEL); + rule = kmalloc(sizeof(struct setid_rule), GFP_KERNEL); if (!rule) { err = -ENOMEM; goto out_free_buf; } + rule->type = policy_type; err = parse_policy_line(file, p, rule); if (err) goto out_free_rule; - if (_setuid_policy_lookup(pol, rule->src_uid, rule->dst_uid) == - SIDPOL_ALLOWED) { + if (_setid_policy_lookup(pol, rule->src_id, rule->dst_id) == SIDPOL_ALLOWED) { pr_warn("bad policy: duplicate entry\n"); err = -EEXIST; goto out_free_rule; @@ -178,21 +208,31 @@ out_free_rule: * What we really want here is an xchg() wrapper for RCU, but since that * doesn't currently exist, just use a spinlock for now. */ - mutex_lock(&policy_update_lock); - pol = rcu_replace_pointer(safesetid_setuid_rules, pol, - lockdep_is_held(&policy_update_lock)); - mutex_unlock(&policy_update_lock); + if (policy_type == UID) { + mutex_lock(&uid_policy_update_lock); + pol = rcu_replace_pointer(safesetid_setuid_rules, pol, + lockdep_is_held(&uid_policy_update_lock)); + mutex_unlock(&uid_policy_update_lock); + } else if (policy_type == GID) { + mutex_lock(&gid_policy_update_lock); + pol = rcu_replace_pointer(safesetid_setgid_rules, pol, + lockdep_is_held(&gid_policy_update_lock)); + mutex_unlock(&gid_policy_update_lock); + } else { + /* Error, policy type is neither UID or GID */ + pr_warn("error: bad policy type"); + } err = len; out_free_buf: kfree(buf); out_free_pol: if (pol) - release_ruleset(pol); + release_ruleset(pol); return err; } -static ssize_t safesetid_file_write(struct file *file, +static ssize_t safesetid_uid_file_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos) @@ -203,38 +243,74 @@ static ssize_t safesetid_file_write(struct file *file, if (*ppos != 0) return -EINVAL; - return handle_policy_update(file, buf, len); + return handle_policy_update(file, buf, len, UID); +} + +static ssize_t safesetid_gid_file_write(struct file *file, + const char __user *buf, + size_t len, + loff_t *ppos) +{ + if (!file_ns_capable(file, &init_user_ns, CAP_MAC_ADMIN)) + return -EPERM; + + if (*ppos != 0) + return -EINVAL; + + return handle_policy_update(file, buf, len, GID); } static ssize_t safesetid_file_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) + size_t len, loff_t *ppos, struct mutex *policy_update_lock, struct __rcu setid_ruleset* ruleset) { ssize_t res = 0; - struct setuid_ruleset *pol; + struct setid_ruleset *pol; const char *kbuf; - mutex_lock(&policy_update_lock); - pol = rcu_dereference_protected(safesetid_setuid_rules, - lockdep_is_held(&policy_update_lock)); + mutex_lock(policy_update_lock); + pol = rcu_dereference_protected(ruleset, lockdep_is_held(policy_update_lock)); if (pol) { kbuf = pol->policy_str; res = simple_read_from_buffer(buf, len, ppos, kbuf, strlen(kbuf)); } - mutex_unlock(&policy_update_lock); + mutex_unlock(policy_update_lock); + return res; } -static const struct file_operations safesetid_file_fops = { - .read = safesetid_file_read, - .write = safesetid_file_write, +static ssize_t safesetid_uid_file_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + return safesetid_file_read(file, buf, len, ppos, + &uid_policy_update_lock, safesetid_setuid_rules); +} + +static ssize_t safesetid_gid_file_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + return safesetid_file_read(file, buf, len, ppos, + &gid_policy_update_lock, safesetid_setgid_rules); +} + + + +static const struct file_operations safesetid_uid_file_fops = { + .read = safesetid_uid_file_read, + .write = safesetid_uid_file_write, +}; + +static const struct file_operations safesetid_gid_file_fops = { + .read = safesetid_gid_file_read, + .write = safesetid_gid_file_write, }; static int __init safesetid_init_securityfs(void) { int ret; struct dentry *policy_dir; - struct dentry *policy_file; + struct dentry *uid_policy_file; + struct dentry *gid_policy_file; if (!safesetid_initialized) return 0; @@ -245,13 +321,21 @@ static int __init safesetid_init_securityfs(void) goto error; } - policy_file = securityfs_create_file("whitelist_policy", 0600, - policy_dir, NULL, &safesetid_file_fops); - if (IS_ERR(policy_file)) { - ret = PTR_ERR(policy_file); + uid_policy_file = securityfs_create_file("uid_allowlist_policy", 0600, + policy_dir, NULL, &safesetid_uid_file_fops); + if (IS_ERR(uid_policy_file)) { + ret = PTR_ERR(uid_policy_file); goto error; } + gid_policy_file = securityfs_create_file("gid_allowlist_policy", 0600, + policy_dir, NULL, &safesetid_gid_file_fops); + if (IS_ERR(gid_policy_file)) { + ret = PTR_ERR(gid_policy_file); + goto error; + } + + return 0; error: |