diff options
Diffstat (limited to 'tools/testing')
64 files changed, 4009 insertions, 318 deletions
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 6e4eb2fc2d1e..0c8b61f8398e 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1880,6 +1880,7 @@ sub get_grub_index { sub wait_for_input { my ($fp, $time) = @_; + my $start_time; my $rin; my $rout; my $nr; @@ -1895,17 +1896,22 @@ sub wait_for_input vec($rin, fileno($fp), 1) = 1; vec($rin, fileno(\*STDIN), 1) = 1; + $start_time = time; + while (1) { $nr = select($rout=$rin, undef, undef, $time); - if ($nr <= 0) { - return undef; - } + last if ($nr <= 0); # copy data from stdin to the console if (vec($rout, fileno(\*STDIN), 1) == 1) { - sysread(\*STDIN, $buf, 1000); - syswrite($fp, $buf, 1000); + $nr = sysread(\*STDIN, $buf, 1000); + syswrite($fp, $buf, $nr) if ($nr > 0); + } + + # The timeout is based on time waiting for the fp data + if (vec($rout, fileno($fp), 1) != 1) { + last if (defined($time) && (time - $start_time > $time)); next; } @@ -1917,12 +1923,11 @@ sub wait_for_input last if ($ch eq "\n"); } - if (!length($line)) { - return undef; - } + last if (!length($line)); return $line; } + return undef; } sub reboot_to { diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 405212be044a..d870520da68b 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -28,7 +28,10 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o obj-$(CONFIG_ND_BLK) += nd_blk.o obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o obj-$(CONFIG_ACPI_NFIT) += nfit.o -obj-$(CONFIG_DEV_DAX) += dax.o +ifeq ($(CONFIG_DAX),m) +obj-$(CONFIG_DAX) += dax.o +endif +obj-$(CONFIG_DEV_DAX) += device_dax.o obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o nfit-y := $(ACPI_SRC)/core.o @@ -48,9 +51,13 @@ nd_blk-y += config_check.o nd_e820-y := $(NVDIMM_SRC)/e820.o nd_e820-y += config_check.o -dax-y := $(DAX_SRC)/dax.o +dax-y := $(DAX_SRC)/super.o dax-y += config_check.o +device_dax-y := $(DAX_SRC)/device.o +device_dax-y += dax-dev.o +device_dax-y += config_check.o + dax_pmem-y := $(DAX_SRC)/pmem.o dax_pmem-y += config_check.o diff --git a/tools/testing/nvdimm/dax-dev.c b/tools/testing/nvdimm/dax-dev.c new file mode 100644 index 000000000000..36ee3d8797c3 --- /dev/null +++ b/tools/testing/nvdimm/dax-dev.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include "test/nfit_test.h" +#include <linux/mm.h> +#include "../../../drivers/dax/dax-private.h" + +phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, + unsigned long size) +{ + struct resource *res; + phys_addr_t addr; + int i; + + for (i = 0; i < dev_dax->num_resources; i++) { + res = &dev_dax->res[i]; + addr = pgoff * PAGE_SIZE + res->start; + if (addr >= res->start && addr <= res->end) + break; + pgoff -= PHYS_PFN(resource_size(res)); + } + + if (i < dev_dax->num_resources) { + res = &dev_dax->res[i]; + if (addr + size - 1 <= res->end) { + if (get_nfit_res(addr)) { + struct page *page; + + if (dev_dax->region->align > PAGE_SIZE) + return -1; + + page = vmalloc_to_page((void *)addr); + return PFN_PHYS(page_to_pfn(page)); + } else + return addr; + } + } + + return -1; +} diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c index c9b8c48f85fc..b53596ad601b 100644 --- a/tools/testing/nvdimm/pmem-dax.c +++ b/tools/testing/nvdimm/pmem-dax.c @@ -15,13 +15,13 @@ #include <pmem.h> #include <nd.h> -long pmem_direct_access(struct block_device *bdev, sector_t sector, - void **kaddr, pfn_t *pfn, long size) +long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, + long nr_pages, void **kaddr, pfn_t *pfn) { - struct pmem_device *pmem = bdev->bd_queue->queuedata; - resource_size_t offset = sector * 512 + pmem->data_offset; + resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset; - if (unlikely(is_bad_pmem(&pmem->bb, sector, size))) + if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512, + PFN_PHYS(nr_pages)))) return -EIO; /* @@ -34,11 +34,10 @@ long pmem_direct_access(struct block_device *bdev, sector_t sector, *kaddr = pmem->virt_addr + offset; page = vmalloc_to_page(pmem->virt_addr + offset); *pfn = page_to_pfn_t(page); - dev_dbg_ratelimited(disk_to_dev(bdev->bd_disk)->parent, - "%s: sector: %#llx pfn: %#lx\n", __func__, - (unsigned long long) sector, page_to_pfn(page)); + pr_debug_ratelimited("%s: pmem: %p pgoff: %#lx pfn: %#lx\n", + __func__, pmem, pgoff, page_to_pfn(page)); - return PAGE_SIZE; + return 1; } *kaddr = pmem->virt_addr + offset; @@ -49,6 +48,6 @@ long pmem_direct_access(struct block_device *bdev, sector_t sector, * requested range. */ if (unlikely(pmem->bb.count)) - return size; - return pmem->size - pmem->pfn_pad - offset; + return nr_pages; + return PHYS_PFN(pmem->size - pmem->pfn_pad - offset); } diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 798f17655433..c2187178fb13 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -132,6 +132,7 @@ static u32 handle[] = { [3] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1), [4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0), [5] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 0), + [6] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 1), }; static unsigned long dimm_fail_cmd_flags[NUM_DCR]; @@ -728,8 +729,8 @@ static int nfit_test0_alloc(struct nfit_test *t) static int nfit_test1_alloc(struct nfit_test *t) { size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2 - + sizeof(struct acpi_nfit_memory_map) - + offsetof(struct acpi_nfit_control_region, window_size); + + sizeof(struct acpi_nfit_memory_map) * 2 + + offsetof(struct acpi_nfit_control_region, window_size) * 2; int i; t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); @@ -906,6 +907,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 2; + memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; /* mem-region2 (spa1, dimm0) */ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2; @@ -921,6 +923,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = SPA0_SIZE/2; memdev->interleave_index = 0; memdev->interleave_ways = 4; + memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; /* mem-region3 (spa1, dimm1) */ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3; @@ -951,6 +954,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = SPA0_SIZE/2; memdev->interleave_index = 0; memdev->interleave_ways = 4; + memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; /* mem-region5 (spa1, dimm3) */ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5; @@ -1086,6 +1090,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; offset = offset + sizeof(struct acpi_nfit_memory_map) * 14; /* dcr-descriptor0: blk */ @@ -1384,6 +1389,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; /* mem-region16 (spa/bdw4, dimm4) */ memdev = nfit_buf + offset + @@ -1486,6 +1492,34 @@ static void nfit_test1_setup(struct nfit_test *t) dcr->code = NFIT_FIC_BYTE; dcr->windows = 0; + offset += dcr->header.length; + memdev = nfit_buf + offset; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[6]; + memdev->physical_id = 0; + memdev->region_id = 0; + memdev->range_index = 0; + memdev->region_index = 0+2; + memdev->region_size = SPA2_SIZE; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + memdev->flags = ACPI_NFIT_MEM_MAP_FAILED; + + /* dcr-descriptor1 */ + offset += sizeof(*memdev); + dcr = nfit_buf + offset; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = offsetof(struct acpi_nfit_control_region, + window_size); + dcr->region_index = 0+2; + dcr_common_init(dcr); + dcr->serial_number = ~handle[6]; + dcr->code = NFIT_FIC_BYTE; + dcr->windows = 0; + post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA2_SIZE); acpi_desc = &t->acpi_desc; @@ -1817,6 +1851,10 @@ static int nfit_test_probe(struct platform_device *pdev) if (rc) return rc; + rc = devm_add_action_or_reset(&pdev->dev, acpi_nfit_shutdown, acpi_desc); + if (rc) + return rc; + if (nfit_test->setup != nfit_test0_setup) return 0; @@ -1907,7 +1945,7 @@ static __init int nfit_test_init(void) case 1: nfit_test->num_pm = 1; nfit_test->dcr_idx = NUM_DCR; - nfit_test->num_dcr = 1; + nfit_test->num_dcr = 2; nfit_test->alloc = nfit_test1_alloc; nfit_test->setup = nfit_test1_setup; break; @@ -1924,6 +1962,7 @@ static __init int nfit_test_init(void) put_device(&pdev->dev); goto err_register; } + get_device(&pdev->dev); rc = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (rc) @@ -1942,6 +1981,10 @@ static __init int nfit_test_init(void) if (instances[i]) platform_device_unregister(&instances[i]->pdev); nfit_test_teardown(); + for (i = 0; i < NUM_NFITS; i++) + if (instances[i]) + put_device(&instances[i]->pdev.dev); + return rc; } @@ -1949,10 +1992,13 @@ static __exit void nfit_test_exit(void) { int i; - platform_driver_unregister(&nfit_test_driver); for (i = 0; i < NUM_NFITS; i++) platform_device_unregister(&instances[i]->pdev); + platform_driver_unregister(&nfit_test_driver); nfit_test_teardown(); + + for (i = 0; i < NUM_NFITS; i++) + put_device(&instances[i]->pdev.dev); class_destroy(nfit_test_dimm); } diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index f11315bedefc..6a9480c03cbd 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -1,6 +1,7 @@ CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address -LDFLAGS += -lpthread -lurcu +LDFLAGS += -fsanitize=address +LDLIBS+= -lpthread -lurcu TARGETS = main idr-test multiorder CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \ @@ -10,23 +11,25 @@ ifndef SHIFT SHIFT=3 endif +ifeq ($(BUILD), 32) + CFLAGS += -m32 + LDFLAGS += -m32 +endif + targets: mapshift $(TARGETS) main: $(OFILES) - $(CC) $(CFLAGS) $(LDFLAGS) $^ -o main idr-test: idr-test.o $(CORE_OFILES) - $(CC) $(CFLAGS) $(LDFLAGS) $^ -o idr-test multiorder: multiorder.o $(CORE_OFILES) - $(CC) $(CFLAGS) $(LDFLAGS) $^ -o multiorder clean: $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h vpath %.c ../../lib -$(OFILES): *.h */*.h generated/map-shift.h \ +$(OFILES): Makefile *.h */*.h generated/map-shift.h \ ../../include/linux/*.h \ ../../include/asm/*.h \ ../../../include/linux/radix-tree.h \ @@ -41,7 +44,7 @@ idr.c: ../../../lib/idr.c .PHONY: mapshift mapshift: - @if ! grep -qw $(SHIFT) generated/map-shift.h; then \ + @if ! grep -qws $(SHIFT) generated/map-shift.h; then \ echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \ generated/map-shift.h; \ fi diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c index 9b09ddfe462f..99c40f3ed133 100644 --- a/tools/testing/radix-tree/benchmark.c +++ b/tools/testing/radix-tree/benchmark.c @@ -17,6 +17,9 @@ #include <time.h> #include "test.h" +#define for_each_index(i, base, order) \ + for (i = base; i < base + (1 << order); i++) + #define NSEC_PER_SEC 1000000000L static long long benchmark_iter(struct radix_tree_root *root, bool tagged) @@ -57,27 +60,176 @@ again: return nsec; } +static void benchmark_insert(struct radix_tree_root *root, + unsigned long size, unsigned long step, int order) +{ + struct timespec start, finish; + unsigned long index; + long long nsec; + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (index = 0 ; index < size ; index += step) + item_insert_order(root, index, order); + + clock_gettime(CLOCK_MONOTONIC, &finish); + + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + printv(2, "Size: %8ld, step: %8ld, order: %d, insertion: %15lld ns\n", + size, step, order, nsec); +} + +static void benchmark_tagging(struct radix_tree_root *root, + unsigned long size, unsigned long step, int order) +{ + struct timespec start, finish; + unsigned long index; + long long nsec; + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (index = 0 ; index < size ; index += step) + radix_tree_tag_set(root, index, 0); + + clock_gettime(CLOCK_MONOTONIC, &finish); + + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + printv(2, "Size: %8ld, step: %8ld, order: %d, tagging: %17lld ns\n", + size, step, order, nsec); +} + +static void benchmark_delete(struct radix_tree_root *root, + unsigned long size, unsigned long step, int order) +{ + struct timespec start, finish; + unsigned long index, i; + long long nsec; + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (index = 0 ; index < size ; index += step) + for_each_index(i, index, order) + item_delete(root, i); + + clock_gettime(CLOCK_MONOTONIC, &finish); + + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + printv(2, "Size: %8ld, step: %8ld, order: %d, deletion: %16lld ns\n", + size, step, order, nsec); +} + static void benchmark_size(unsigned long size, unsigned long step, int order) { RADIX_TREE(tree, GFP_KERNEL); long long normal, tagged; - unsigned long index; - for (index = 0 ; index < size ; index += step) { - item_insert_order(&tree, index, order); - radix_tree_tag_set(&tree, index, 0); - } + benchmark_insert(&tree, size, step, order); + benchmark_tagging(&tree, size, step, order); tagged = benchmark_iter(&tree, true); normal = benchmark_iter(&tree, false); - printv(2, "Size %ld, step %6ld, order %d tagged %10lld ns, normal %10lld ns\n", - size, step, order, tagged, normal); + printv(2, "Size: %8ld, step: %8ld, order: %d, tagged iteration: %8lld ns\n", + size, step, order, tagged); + printv(2, "Size: %8ld, step: %8ld, order: %d, normal iteration: %8lld ns\n", + size, step, order, normal); + + benchmark_delete(&tree, size, step, order); item_kill_tree(&tree); rcu_barrier(); } +static long long __benchmark_split(unsigned long index, + int old_order, int new_order) +{ + struct timespec start, finish; + long long nsec; + RADIX_TREE(tree, GFP_ATOMIC); + + item_insert_order(&tree, index, old_order); + + clock_gettime(CLOCK_MONOTONIC, &start); + radix_tree_split(&tree, index, new_order); + clock_gettime(CLOCK_MONOTONIC, &finish); + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + item_kill_tree(&tree); + + return nsec; + +} + +static void benchmark_split(unsigned long size, unsigned long step) +{ + int i, j, idx; + long long nsec = 0; + + + for (idx = 0; idx < size; idx += step) { + for (i = 3; i < 11; i++) { + for (j = 0; j < i; j++) { + nsec += __benchmark_split(idx, i, j); + } + } + } + + printv(2, "Size %8ld, step %8ld, split time %10lld ns\n", + size, step, nsec); + +} + +static long long __benchmark_join(unsigned long index, + unsigned order1, unsigned order2) +{ + unsigned long loc; + struct timespec start, finish; + long long nsec; + void *item, *item2 = item_create(index + 1, order1); + RADIX_TREE(tree, GFP_KERNEL); + + item_insert_order(&tree, index, order2); + item = radix_tree_lookup(&tree, index); + + clock_gettime(CLOCK_MONOTONIC, &start); + radix_tree_join(&tree, index + 1, order1, item2); + clock_gettime(CLOCK_MONOTONIC, &finish); + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + loc = find_item(&tree, item); + if (loc == -1) + free(item); + + item_kill_tree(&tree); + + return nsec; +} + +static void benchmark_join(unsigned long step) +{ + int i, j, idx; + long long nsec = 0; + + for (idx = 0; idx < 1 << 10; idx += step) { + for (i = 1; i < 15; i++) { + for (j = 0; j < i; j++) { + nsec += __benchmark_join(idx, i, j); + } + } + } + + printv(2, "Size %8d, step %8ld, join time %10lld ns\n", + 1 << 10, step, nsec); +} + void benchmark(void) { unsigned long size[] = {1 << 10, 1 << 20, 0}; @@ -95,4 +247,11 @@ void benchmark(void) for (c = 0; size[c]; c++) for (s = 0; step[s]; s++) benchmark_size(size[c], step[s] << 9, 9); + + for (c = 0; size[c]; c++) + for (s = 0; step[s]; s++) + benchmark_split(size[c], step[s]); + + for (s = 0; step[s]; s++) + benchmark_join(step[s]); } diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index a26098c6123d..30cd0b296f1a 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -153,6 +153,30 @@ void idr_nowait_test(void) idr_destroy(&idr); } +void idr_get_next_test(void) +{ + unsigned long i; + int nextid; + DEFINE_IDR(idr); + + int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0}; + + for(i = 0; indices[i]; i++) { + struct item *item = item_create(indices[i], 0); + assert(idr_alloc(&idr, item, indices[i], indices[i+1], + GFP_KERNEL) == indices[i]); + } + + for(i = 0, nextid = 0; indices[i]; i++) { + idr_get_next(&idr, &nextid); + assert(nextid == indices[i]); + nextid++; + } + + idr_for_each(&idr, item_idr_free, &idr); + idr_destroy(&idr); +} + void idr_checks(void) { unsigned long i; @@ -202,6 +226,7 @@ void idr_checks(void) idr_alloc_test(); idr_null_test(); idr_nowait_test(); + idr_get_next_test(); } /* @@ -338,7 +363,7 @@ void ida_check_random(void) { DEFINE_IDA(ida); DECLARE_BITMAP(bitmap, 2048); - int id; + int id, err; unsigned int i; time_t s = time(NULL); @@ -352,8 +377,11 @@ void ida_check_random(void) ida_remove(&ida, bit); } else { __set_bit(bit, bitmap); - ida_pre_get(&ida, GFP_KERNEL); - assert(!ida_get_new_above(&ida, bit, &id)); + do { + ida_pre_get(&ida, GFP_KERNEL); + err = ida_get_new_above(&ida, bit, &id); + } while (err == -ENOMEM); + assert(!err); assert(id == bit); } } @@ -362,6 +390,24 @@ void ida_check_random(void) goto repeat; } +void ida_simple_get_remove_test(void) +{ + DEFINE_IDA(ida); + unsigned long i; + + for (i = 0; i < 10000; i++) { + assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i); + } + assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0); + + for (i = 0; i < 10000; i++) { + ida_simple_remove(&ida, i); + } + assert(ida_is_empty(&ida)); + + ida_destroy(&ida); +} + void ida_checks(void) { DEFINE_IDA(ida); @@ -428,15 +474,41 @@ void ida_checks(void) ida_check_max(); ida_check_conv(); ida_check_random(); + ida_simple_get_remove_test(); radix_tree_cpu_dead(1); } +static void *ida_random_fn(void *arg) +{ + rcu_register_thread(); + ida_check_random(); + rcu_unregister_thread(); + return NULL; +} + +void ida_thread_tests(void) +{ + pthread_t threads[10]; + int i; + + for (i = 0; i < ARRAY_SIZE(threads); i++) + if (pthread_create(&threads[i], NULL, ida_random_fn, NULL)) { + perror("creating ida thread"); + exit(1); + } + + while (i--) + pthread_join(threads[i], NULL); +} + int __weak main(void) { radix_tree_init(); idr_checks(); ida_checks(); + ida_thread_tests(); + radix_tree_cpu_dead(1); rcu_barrier(); if (nr_allocated) printf("nr_allocated = %d\n", nr_allocated); diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index b829127d5670..bc9a78449572 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -368,6 +368,7 @@ int main(int argc, char **argv) iteration_test(0, 10 + 90 * long_run); iteration_test(7, 10 + 90 * long_run); single_thread_tests(long_run); + ida_thread_tests(); /* Free any remaining preallocated nodes */ radix_tree_cpu_dead(0); diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c index d4ff00989245..36dcf7d6945d 100644 --- a/tools/testing/radix-tree/tag_check.c +++ b/tools/testing/radix-tree/tag_check.c @@ -330,6 +330,34 @@ static void single_check(void) item_kill_tree(&tree); } +void radix_tree_clear_tags_test(void) +{ + unsigned long index; + struct radix_tree_node *node; + struct radix_tree_iter iter; + void **slot; + + RADIX_TREE(tree, GFP_KERNEL); + + item_insert(&tree, 0); + item_tag_set(&tree, 0, 0); + __radix_tree_lookup(&tree, 0, &node, &slot); + radix_tree_clear_tags(&tree, node, slot); + assert(item_tag_get(&tree, 0, 0) == 0); + + for (index = 0; index < 1000; index++) { + item_insert(&tree, index); + item_tag_set(&tree, index, 0); + } + + radix_tree_for_each_slot(slot, &tree, &iter, 0) { + radix_tree_clear_tags(&tree, iter.node, slot); + assert(item_tag_get(&tree, iter.index, 0) == 0); + } + + item_kill_tree(&tree); +} + void tag_check(void) { single_check(); @@ -347,4 +375,5 @@ void tag_check(void) thrash_tags(); rcu_barrier(); printv(2, "after thrash_tags: %d allocated\n", nr_allocated); + radix_tree_clear_tags_test(); } diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index b30e11d9d271..0f8220cc6166 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -36,6 +36,7 @@ void iteration_test(unsigned order, unsigned duration); void benchmark(void); void idr_checks(void); void ida_checks(void); +void ida_thread_tests(void); struct item * item_tag_set(struct radix_tree_root *root, unsigned long index, int tag); diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4b498265dae6..91edd0566237 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1,20 +1,39 @@ LIBDIR := ../../../lib -BPFOBJ := $(LIBDIR)/bpf/bpf.o +BPFDIR := $(LIBDIR)/bpf +APIDIR := ../../../include/uapi +GENDIR := ../../../../include/generated +GENHDR := $(GENDIR)/autoconf.h -CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) +ifneq ($(wildcard $(GENHDR)),) + GENFLAGS := -DHAVE_GENHDR +endif -TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map +CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include +LDLIBS += -lcap -lelf + +TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs + +TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o TEST_PROGS := test_kmod.sh -.PHONY: all clean force +include ../lib.mk + +BPFOBJ := $(OUTPUT)/libbpf.a + +$(TEST_GEN_PROGS): $(BPFOBJ) + +.PHONY: force # force a rebuild of BPFOBJ when its dependencies are updated force: $(BPFOBJ): force - $(MAKE) -C $(dir $(BPFOBJ)) + $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ -$(test_objs): $(BPFOBJ) +CLANG ?= clang -include ../lib.mk +%.o: %.c + $(CLANG) -I. -I../../../include/uapi -I../../../../samples/bpf/ \ + -Wno-compare-distinct-pointer-types \ + -O2 -target bpf -c $< -o $@ diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h new file mode 100644 index 000000000000..19d0604f8694 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_endian.h @@ -0,0 +1,23 @@ +#ifndef __BPF_ENDIAN__ +#define __BPF_ENDIAN__ + +#include <asm/byteorder.h> + +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define __bpf_ntohs(x) __builtin_bswap16(x) +# define __bpf_htons(x) __builtin_bswap16(x) +#elif __BYTE_ORDER == __BIG_ENDIAN +# define __bpf_ntohs(x) (x) +# define __bpf_htons(x) (x) +#else +# error "Fix your __BYTE_ORDER?!" +#endif + +#define bpf_htons(x) \ + (__builtin_constant_p(x) ? \ + __constant_htons(x) : __bpf_htons(x)) +#define bpf_ntohs(x) \ + (__builtin_constant_p(x) ? \ + __constant_ntohs(x) : __bpf_ntohs(x)) + +#endif diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 84a5d1823f02..20ecbaa0d85d 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -35,4 +35,11 @@ static inline unsigned int bpf_num_possible_cpus(void) return possible_cpus; } +#define __bpf_percpu_val_align __attribute__((__aligned__(8))) + +#define BPF_DECLARE_PERCPU(type, name) \ + struct { type v; /* padding */ } __bpf_percpu_val_align \ + name[bpf_num_possible_cpus()] +#define bpf_percpu(name, cpu) name[(cpu)].v + #endif /* __BPF_UTIL__ */ diff --git a/tools/testing/selftests/bpf/gnu/stubs.h b/tools/testing/selftests/bpf/gnu/stubs.h new file mode 100644 index 000000000000..719225b16626 --- /dev/null +++ b/tools/testing/selftests/bpf/gnu/stubs.h @@ -0,0 +1 @@ +/* dummy .h to trick /usr/include/features.h to work with 'clang -target bpf' */ diff --git a/tools/testing/selftests/bpf/test_iptunnel_common.h b/tools/testing/selftests/bpf/test_iptunnel_common.h new file mode 100644 index 000000000000..e4cd252a1b20 --- /dev/null +++ b/tools/testing/selftests/bpf/test_iptunnel_common.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _TEST_IPTNL_COMMON_H +#define _TEST_IPTNL_COMMON_H + +#include <linux/types.h> + +#define MAX_IPTNL_ENTRIES 256U + +struct vip { + union { + __u32 v6[4]; + __u32 v4; + } daddr; + __u16 dport; + __u16 family; + __u8 protocol; +}; + +struct iptnl_info { + union { + __u32 v6[4]; + __u32 v4; + } saddr; + union { + __u32 v6[4]; + __u32 v4; + } daddr; + __u16 family; + __u8 dmac[6]; +}; + +#endif diff --git a/tools/testing/selftests/bpf/test_l4lb.c b/tools/testing/selftests/bpf/test_l4lb.c new file mode 100644 index 000000000000..1e10c9590991 --- /dev/null +++ b/tools/testing/selftests/bpf/test_l4lb.c @@ -0,0 +1,473 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include "bpf_helpers.h" +#include "test_iptunnel_common.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; + +static inline __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static inline u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static inline u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +#define PCKT_FRAGMENTED 65343 +#define IPV4_HDR_LEN_NO_OPT 20 +#define IPV4_PLUS_ICMP_HDR 28 +#define IPV6_PLUS_ICMP_HDR 48 +#define RING_SIZE 2 +#define MAX_VIPS 12 +#define MAX_REALS 5 +#define CTL_MAP_SIZE 16 +#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE) +#define F_IPV6 (1 << 0) +#define F_HASH_NO_SRC_PORT (1 << 0) +#define F_ICMP (1 << 0) +#define F_SYN_SET (1 << 1) + +struct packet_description { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct vip_stats { + __u64 bytes; + __u64 pkts; +}; + +struct eth_hdr { + unsigned char eth_dest[ETH_ALEN]; + unsigned char eth_source[ETH_ALEN]; + unsigned short eth_proto; +}; + +struct bpf_map_def SEC("maps") vip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip), + .value_size = sizeof(struct vip_meta), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ch_rings = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = CH_RINGS_SIZE, +}; + +struct bpf_map_def SEC("maps") reals = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct real_definition), + .max_entries = MAX_REALS, +}; + +struct bpf_map_def SEC("maps") stats = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct vip_stats), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ctl_array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct ctl_value), + .max_entries = CTL_MAP_SIZE, +}; + +static __always_inline __u32 get_packet_hash(struct packet_description *pckt, + bool ipv6) +{ + if (ipv6) + return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS), + pckt->ports, CH_RINGS_SIZE); + else + return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE); +} + +static __always_inline bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6) +{ + __u32 hash = get_packet_hash(pckt, is_ipv6) % RING_SIZE; + __u32 key = RING_SIZE * vip_info->vip_num + hash; + __u32 *real_pos; + + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return false; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return false; + return true; +} + +static __always_inline int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG) + return TC_ACT_OK; + off += sizeof(struct icmp6hdr); + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + pckt->proto = ip6h->nexthdr; + pckt->flags |= F_ICMP; + memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16); + memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16); + return TC_ACT_UNSPEC; +} + +static __always_inline int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->type != ICMP_DEST_UNREACH || + icmp_hdr->code != ICMP_FRAG_NEEDED) + return TC_ACT_OK; + off += sizeof(struct icmphdr); + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + pckt->proto = iph->protocol; + pckt->flags |= F_ICMP; + pckt->src = iph->daddr; + pckt->dst = iph->saddr; + return TC_ACT_UNSPEC; +} + +static __always_inline bool parse_udp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct udphdr *udp; + udp = data + off; + + if (udp + 1 > data_end) + return false; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = udp->source; + pckt->port16[1] = udp->dest; + } else { + pckt->port16[0] = udp->dest; + pckt->port16[1] = udp->source; + } + return true; +} + +static __always_inline bool parse_tcp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct tcphdr *tcp; + + tcp = data + off; + if (tcp + 1 > data_end) + return false; + + if (tcp->syn) + pckt->flags |= F_SYN_SET; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = tcp->source; + pckt->port16[1] = tcp->dest; + } else { + pckt->port16[0] = tcp->dest; + pckt->port16[1] = tcp->source; + } + return true; +} + +static __always_inline int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct __sk_buff *skb) +{ + void *pkt_start = (void *)(long)skb->data; + struct packet_description pckt = {}; + struct eth_hdr *eth = pkt_start; + struct bpf_tunnel_key tkey = {}; + struct vip_stats *data_stats; + struct real_definition *dst; + struct vip_meta *vip_info; + struct ctl_value *cval; + __u32 v4_intf_pos = 1; + __u32 v6_intf_pos = 2; + struct ipv6hdr *ip6h; + struct vip vip = {}; + struct iphdr *iph; + int tun_flag = 0; + __u16 pkt_bytes; + __u64 iph_len; + __u32 ifindex; + __u8 protocol; + __u32 vip_num; + int action; + + tkey.tunnel_ttl = 64; + if (is_ipv6) { + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + + iph_len = sizeof(struct ipv6hdr); + protocol = ip6h->nexthdr; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(ip6h->payload_len); + off += iph_len; + if (protocol == IPPROTO_FRAGMENT) { + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_ICMPV6) { + action = parse_icmpv6(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV6_PLUS_ICMP_HDR; + } else { + memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16); + memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16); + } + } else { + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + + protocol = iph->protocol; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(iph->tot_len); + off += IPV4_HDR_LEN_NO_OPT; + + if (iph->frag_off & PCKT_FRAGMENTED) + return TC_ACT_SHOT; + if (protocol == IPPROTO_ICMP) { + action = parse_icmp(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV4_PLUS_ICMP_HDR; + } else { + pckt.src = iph->saddr; + pckt.dst = iph->daddr; + } + } + protocol = pckt.proto; + + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else { + return TC_ACT_SHOT; + } + + if (is_ipv6) + memcpy(vip.daddr.v6, pckt.dstv6, 16); + else + vip.daddr.v4 = pckt.dst; + + vip.dport = pckt.port16[1]; + vip.protocol = pckt.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.dport = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return TC_ACT_SHOT; + pckt.port16[1] = 0; + } + + if (vip_info->flags & F_HASH_NO_SRC_PORT) + pckt.port16[0] = 0; + + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6)) + return TC_ACT_SHOT; + + if (dst->flags & F_IPV6) { + cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + memcpy(tkey.remote_ipv6, dst->dstv6, 16); + tun_flag = BPF_F_TUNINFO_IPV6; + } else { + cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + tkey.remote_ipv4 = dst->dst; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return TC_ACT_SHOT; + data_stats->pkts++; + data_stats->bytes += pkt_bytes; + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag); + *(u32 *)eth->eth_dest = tkey.remote_ipv4; + return bpf_redirect(ifindex, 0); +} + +SEC("l4lb-demo") +int balancer_ingress(struct __sk_buff *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return TC_ACT_SHOT; + eth_proto = eth->eth_proto; + if (eth_proto == bpf_htons(ETH_P_IP)) + return process_packet(data, nh_off, data_end, false, ctx); + else if (eth_proto == bpf_htons(ETH_P_IPV6)) + return process_packet(data, nh_off, data_end, true, ctx); + else + return TC_ACT_SHOT; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index 00b0aff56e2e..8c10c9180c1a 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -22,7 +22,7 @@ #include "bpf_util.h" #define LOCAL_FREE_TARGET (128) -#define PERCPU_FREE_TARGET (16) +#define PERCPU_FREE_TARGET (4) static int nr_cpus; @@ -191,12 +191,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) int next_cpu = 0; if (map_flags & BPF_F_NO_COMMON_LRU) - /* Ther percpu lru list (i.e each cpu has its own LRU - * list) does not have a local free list. Hence, - * it will only free old nodes till there is no free - * from the LRU list. Hence, this test does not apply - * to BPF_F_NO_COMMON_LRU - */ + /* This test is only applicable to common LRU list */ return; printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, @@ -227,7 +222,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) for (key = 1; key < end_key; key++) { assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, - BPF_NOEXIST)); + BPF_NOEXIST)); } /* Insert 1+tgt_free to 2*tgt_free @@ -273,12 +268,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) int next_cpu = 0; if (map_flags & BPF_F_NO_COMMON_LRU) - /* Ther percpu lru list (i.e each cpu has its own LRU - * list) does not have a local free list. Hence, - * it will only free old nodes till there is no free - * from the LRU list. Hence, this test does not apply - * to BPF_F_NO_COMMON_LRU - */ + /* This test is only applicable to common LRU list */ return; printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, @@ -290,11 +280,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) assert(batch_size * 2 == tgt_free); map_size = tgt_free + batch_size; - if (map_flags & BPF_F_NO_COMMON_LRU) - lru_map_fd = create_map(map_type, map_flags, - map_size * nr_cpus); - else - lru_map_fd = create_map(map_type, map_flags, map_size); + lru_map_fd = create_map(map_type, map_flags, map_size); assert(lru_map_fd != -1); expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size); @@ -341,7 +327,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); assert(value[0] == 4321); assert(!bpf_map_update_elem(expected_map_fd, &key, value, - BPF_NOEXIST)); + BPF_NOEXIST)); } value[0] = 1234; @@ -361,7 +347,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, - BPF_NOEXIST)); + BPF_NOEXIST)); } assert(map_equal(lru_map_fd, expected_map_fd)); @@ -387,6 +373,10 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) unsigned int map_size; int next_cpu = 0; + if (map_flags & BPF_F_NO_COMMON_LRU) + /* This test is only applicable to common LRU list */ + return; + printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, map_flags); @@ -396,11 +386,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) assert(batch_size * 2 == tgt_free); map_size = tgt_free * 2; - if (map_flags & BPF_F_NO_COMMON_LRU) - lru_map_fd = create_map(map_type, map_flags, - map_size * nr_cpus); - else - lru_map_fd = create_map(map_type, map_flags, map_size); + lru_map_fd = create_map(map_type, map_flags, map_size); assert(lru_map_fd != -1); expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size); @@ -419,7 +405,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) for (key = 1; key < end_key; key++) { assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, - BPF_NOEXIST)); + BPF_NOEXIST)); } /* Add 1+2*tgt_free to tgt_free*5/2 @@ -431,7 +417,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, - BPF_NOEXIST)); + BPF_NOEXIST)); } assert(map_equal(lru_map_fd, expected_map_fd)); @@ -491,7 +477,7 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, - BPF_NOEXIST)); + BPF_NOEXIST)); } assert(map_equal(lru_map_fd, expected_map_fd)); @@ -566,6 +552,65 @@ static void test_lru_sanity5(int map_type, int map_flags) printf("Pass\n"); } +/* Test list rotation for BPF_F_NO_COMMON_LRU map */ +static void test_lru_sanity6(int map_type, int map_flags, int tgt_free) +{ + int lru_map_fd, expected_map_fd; + unsigned long long key, value[nr_cpus]; + unsigned int map_size = tgt_free * 2; + int next_cpu = 0; + + if (!(map_flags & BPF_F_NO_COMMON_LRU)) + return; + + printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, + map_flags); + + assert(sched_next_online(0, &next_cpu) != -1); + + expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size); + assert(expected_map_fd != -1); + + lru_map_fd = create_map(map_type, map_flags, map_size * nr_cpus); + assert(lru_map_fd != -1); + + value[0] = 1234; + + for (key = 1; key <= tgt_free; key++) { + assert(!bpf_map_update_elem(lru_map_fd, &key, value, + BPF_NOEXIST)); + assert(!bpf_map_update_elem(expected_map_fd, &key, value, + BPF_NOEXIST)); + } + + for (; key <= tgt_free * 2; key++) { + unsigned long long stable_key; + + /* Make ref bit sticky for key: [1, tgt_free] */ + for (stable_key = 1; stable_key <= tgt_free; stable_key++) { + /* Mark the ref bit */ + assert(!bpf_map_lookup_elem(lru_map_fd, &stable_key, + value)); + } + assert(!bpf_map_update_elem(lru_map_fd, &key, value, + BPF_NOEXIST)); + } + + for (; key <= tgt_free * 3; key++) { + assert(!bpf_map_update_elem(lru_map_fd, &key, value, + BPF_NOEXIST)); + assert(!bpf_map_update_elem(expected_map_fd, &key, value, + BPF_NOEXIST)); + } + + assert(map_equal(lru_map_fd, expected_map_fd)); + + close(expected_map_fd); + close(lru_map_fd); + + printf("Pass\n"); +} + int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; @@ -593,6 +638,7 @@ int main(int argc, char **argv) test_lru_sanity3(map_types[t], map_flags[f], tgt_free); test_lru_sanity4(map_types[t], map_flags[f], tgt_free); test_lru_sanity5(map_types[t], map_flags[f]); + test_lru_sanity6(map_types[t], map_flags[f], tgt_free); printf("\n"); } diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index cada17ac00b8..93314524de0d 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -28,7 +28,7 @@ static int map_flags; static void test_hashmap(int task, void *data) { - long long key, next_key, value; + long long key, next_key, first_key, value; int fd; fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), @@ -80,18 +80,22 @@ static void test_hashmap(int task, void *data) assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0); key = 2; assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0); - key = 1; - assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0); + key = 3; + assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && + errno == E2BIG); /* Check that key = 0 doesn't exist. */ key = 0; assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT); /* Iterate over two elements. */ + assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 && + (first_key == 1 || first_key == 2)); assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 && - (next_key == 1 || next_key == 2)); + (next_key == first_key)); assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 && - (next_key == 1 || next_key == 2)); + (next_key == 1 || next_key == 2) && + (next_key != first_key)); assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 && errno == ENOENT); @@ -104,29 +108,49 @@ static void test_hashmap(int task, void *data) key = 0; /* Check that map is empty. */ + assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 && + errno == ENOENT); assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 && errno == ENOENT); close(fd); } +static void test_hashmap_sizes(int task, void *data) +{ + int fd, i, j; + + for (i = 1; i <= 512; i <<= 1) + for (j = 1; j <= 1 << 18; j <<= 1) { + fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j, + 2, map_flags); + if (fd < 0) { + printf("Failed to create hashmap key=%d value=%d '%s'\n", + i, j, strerror(errno)); + exit(1); + } + close(fd); + usleep(10); /* give kernel time to destroy */ + } +} + static void test_hashmap_percpu(int task, void *data) { unsigned int nr_cpus = bpf_num_possible_cpus(); - long long value[nr_cpus]; - long long key, next_key; + BPF_DECLARE_PERCPU(long, value); + long long key, next_key, first_key; int expected_key_mask = 0; int fd, i; fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key), - sizeof(value[0]), 2, map_flags); + sizeof(bpf_percpu(value, 0)), 2, map_flags); if (fd < 0) { printf("Failed to create hashmap '%s'!\n", strerror(errno)); exit(1); } for (i = 0; i < nr_cpus; i++) - value[i] = i + 100; + bpf_percpu(value, i) = i + 100; key = 1; /* Insert key=1 element. */ @@ -146,8 +170,9 @@ static void test_hashmap_percpu(int task, void *data) /* Check that key=1 can be found. Value could be 0 if the lookup * was run from a different CPU. */ - value[0] = 1; - assert(bpf_map_lookup_elem(fd, &key, value) == 0 && value[0] == 100); + bpf_percpu(value, 0) = 1; + assert(bpf_map_lookup_elem(fd, &key, value) == 0 && + bpf_percpu(value, 0) == 100); key = 2; /* Check that key=2 is not found. */ @@ -174,14 +199,20 @@ static void test_hashmap_percpu(int task, void *data) assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT); /* Iterate over two elements. */ + assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 && + ((expected_key_mask & first_key) == first_key)); while (!bpf_map_get_next_key(fd, &key, &next_key)) { + if (first_key) { + assert(next_key == first_key); + first_key = 0; + } assert((expected_key_mask & next_key) == next_key); expected_key_mask &= ~next_key; assert(bpf_map_lookup_elem(fd, &next_key, value) == 0); for (i = 0; i < nr_cpus; i++) - assert(value[i] == i + 100); + assert(bpf_percpu(value, i) == i + 100); key = next_key; } @@ -200,6 +231,8 @@ static void test_hashmap_percpu(int task, void *data) key = 0; /* Check that map is empty. */ + assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 && + errno == ENOENT); assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 && errno == ENOENT); @@ -245,6 +278,8 @@ static void test_arraymap(int task, void *data) assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT); /* Iterate over two elements. */ + assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 && + next_key == 0); assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 && next_key == 0); assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 && @@ -262,34 +297,36 @@ static void test_arraymap(int task, void *data) static void test_arraymap_percpu(int task, void *data) { unsigned int nr_cpus = bpf_num_possible_cpus(); + BPF_DECLARE_PERCPU(long, values); int key, next_key, fd, i; - long values[nr_cpus]; fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), - sizeof(values[0]), 2, 0); + sizeof(bpf_percpu(values, 0)), 2, 0); if (fd < 0) { printf("Failed to create arraymap '%s'!\n", strerror(errno)); exit(1); } for (i = 0; i < nr_cpus; i++) - values[i] = i + 100; + bpf_percpu(values, i) = i + 100; key = 1; /* Insert key=1 element. */ assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0); - values[0] = 0; + bpf_percpu(values, 0) = 0; assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 && errno == EEXIST); /* Check that key=1 can be found. */ - assert(bpf_map_lookup_elem(fd, &key, values) == 0 && values[0] == 100); + assert(bpf_map_lookup_elem(fd, &key, values) == 0 && + bpf_percpu(values, 0) == 100); key = 0; /* Check that key=0 is also found and zero initialized. */ assert(bpf_map_lookup_elem(fd, &key, values) == 0 && - values[0] == 0 && values[nr_cpus - 1] == 0); + bpf_percpu(values, 0) == 0 && + bpf_percpu(values, nr_cpus - 1) == 0); /* Check that key=2 cannot be inserted due to max_entries limit. */ key = 2; @@ -300,6 +337,8 @@ static void test_arraymap_percpu(int task, void *data) assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT); /* Iterate over two elements. */ + assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 && + next_key == 0); assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 && next_key == 0); assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 && @@ -317,12 +356,15 @@ static void test_arraymap_percpu(int task, void *data) static void test_arraymap_percpu_many_keys(void) { unsigned int nr_cpus = bpf_num_possible_cpus(); - unsigned int nr_keys = 20000; - long values[nr_cpus]; + BPF_DECLARE_PERCPU(long, values); + /* nr_keys is not too large otherwise the test stresses percpu + * allocator more than anything else + */ + unsigned int nr_keys = 2000; int key, fd, i; fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), - sizeof(values[0]), nr_keys, 0); + sizeof(bpf_percpu(values, 0)), nr_keys, 0); if (fd < 0) { printf("Failed to create per-cpu arraymap '%s'!\n", strerror(errno)); @@ -330,19 +372,19 @@ static void test_arraymap_percpu_many_keys(void) } for (i = 0; i < nr_cpus; i++) - values[i] = i + 10; + bpf_percpu(values, i) = i + 10; for (key = 0; key < nr_keys; key++) assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0); for (key = 0; key < nr_keys; key++) { for (i = 0; i < nr_cpus; i++) - values[i] = 0; + bpf_percpu(values, i) = 0; assert(bpf_map_lookup_elem(fd, &key, values) == 0); for (i = 0; i < nr_cpus; i++) - assert(values[i] == i + 10); + assert(bpf_percpu(values, i) == i + 10); } close(fd); @@ -378,6 +420,8 @@ static void test_map_large(void) errno == E2BIG); /* Iterate through all elements. */ + assert(bpf_map_get_next_key(fd, NULL, &key) == 0); + key.c = -1; for (i = 0; i < MAP_SIZE; i++) assert(bpf_map_get_next_key(fd, &key, &key) == 0); assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT); @@ -419,6 +463,7 @@ static void test_map_stress(void) { run_parallel(100, test_hashmap, NULL); run_parallel(100, test_hashmap_percpu, NULL); + run_parallel(100, test_hashmap_sizes, NULL); run_parallel(100, test_arraymap, NULL); run_parallel(100, test_arraymap_percpu, NULL); @@ -476,6 +521,7 @@ static void test_map_parallel(void) errno == EEXIST); /* Check that all elements were inserted. */ + assert(bpf_map_get_next_key(fd, NULL, &key) == 0); key = -1; for (i = 0; i < MAP_SIZE; i++) assert(bpf_map_get_next_key(fd, &key, &key) == 0); @@ -495,6 +541,7 @@ static void test_map_parallel(void) /* Nothing should be left. */ key = -1; + assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT); assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT); } diff --git a/tools/testing/selftests/bpf/test_pkt_access.c b/tools/testing/selftests/bpf/test_pkt_access.c new file mode 100644 index 000000000000..39387bb7e08c --- /dev/null +++ b/tools/testing/selftests/bpf/test_pkt_access.c @@ -0,0 +1,64 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stddef.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/in.h> +#include <linux/tcp.h> +#include <linux/pkt_cls.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define barrier() __asm__ __volatile__("": : :"memory") +int _version SEC("version") = 1; + +SEC("test1") +int process(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ethhdr *eth = (struct ethhdr *)(data); + struct tcphdr *tcp = NULL; + __u8 proto = 255; + __u64 ihl_len; + + if (eth + 1 > data_end) + return TC_ACT_SHOT; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)(eth + 1); + + if (iph + 1 > data_end) + return TC_ACT_SHOT; + ihl_len = iph->ihl * 4; + proto = iph->protocol; + tcp = (struct tcphdr *)((void *)(iph) + ihl_len); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = (struct ipv6hdr *)(eth + 1); + + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + ihl_len = sizeof(*ip6h); + proto = ip6h->nexthdr; + tcp = (struct tcphdr *)((void *)(ip6h) + ihl_len); + } + + if (tcp) { + if (((void *)(tcp) + 20) > data_end || proto != 6) + return TC_ACT_SHOT; + barrier(); /* to force ordering of checks */ + if (((void *)(tcp) + 18) > data_end) + return TC_ACT_SHOT; + if (tcp->urg_ptr == 123) + return TC_ACT_OK; + } + + return TC_ACT_UNSPEC; +} diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c new file mode 100644 index 000000000000..b59f5ed4ae40 --- /dev/null +++ b/tools/testing/selftests/bpf/test_progs.c @@ -0,0 +1,299 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <assert.h> +#include <stdlib.h> + +#include <linux/types.h> +typedef __u16 __sum16; +#include <arpa/inet.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> + +#include <sys/wait.h> +#include <sys/resource.h> + +#include <linux/bpf.h> +#include <linux/err.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include "test_iptunnel_common.h" +#include "bpf_util.h" +#include "bpf_endian.h" + +static int error_cnt, pass_cnt; + +#define MAGIC_BYTES 123 + +/* ipv4 test vector */ +static struct { + struct ethhdr eth; + struct iphdr iph; + struct tcphdr tcp; +} __packed pkt_v4 = { + .eth.h_proto = bpf_htons(ETH_P_IP), + .iph.ihl = 5, + .iph.protocol = 6, + .iph.tot_len = bpf_htons(MAGIC_BYTES), + .tcp.urg_ptr = 123, +}; + +/* ipv6 test vector */ +static struct { + struct ethhdr eth; + struct ipv6hdr iph; + struct tcphdr tcp; +} __packed pkt_v6 = { + .eth.h_proto = bpf_htons(ETH_P_IPV6), + .iph.nexthdr = 6, + .iph.payload_len = bpf_htons(MAGIC_BYTES), + .tcp.urg_ptr = 123, +}; + +#define CHECK(condition, tag, format...) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + error_cnt++; \ + printf("%s:FAIL:%s ", __func__, tag); \ + printf(format); \ + } else { \ + pass_cnt++; \ + printf("%s:PASS:%s %d nsec\n", __func__, tag, duration);\ + } \ +}) + +static int bpf_prog_load(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd) +{ + struct bpf_program *prog; + struct bpf_object *obj; + int err; + + obj = bpf_object__open(file); + if (IS_ERR(obj)) { + error_cnt++; + return -ENOENT; + } + + prog = bpf_program__next(NULL, obj); + if (!prog) { + bpf_object__close(obj); + error_cnt++; + return -ENOENT; + } + + bpf_program__set_type(prog, type); + err = bpf_object__load(obj); + if (err) { + bpf_object__close(obj); + error_cnt++; + return -EINVAL; + } + + *pobj = obj; + *prog_fd = bpf_program__fd(prog); + return 0; +} + +static int bpf_find_map(const char *test, struct bpf_object *obj, + const char *name) +{ + struct bpf_map *map; + + map = bpf_object__find_map_by_name(obj, name); + if (!map) { + printf("%s:FAIL:map '%s' not found\n", test, name); + error_cnt++; + return -1; + } + return bpf_map__fd(map); +} + +static void test_pkt_access(void) +{ + const char *file = "./test_pkt_access.o"; + struct bpf_object *obj; + __u32 duration, retval; + int err, prog_fd; + + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + if (err) + return; + + err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, &retval, &duration); + CHECK(err || errno || retval, "ipv4", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + + err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6), + NULL, NULL, &retval, &duration); + CHECK(err || errno || retval, "ipv6", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + bpf_object__close(obj); +} + +static void test_xdp(void) +{ + struct vip key4 = {.protocol = 6, .family = AF_INET}; + struct vip key6 = {.protocol = 6, .family = AF_INET6}; + struct iptnl_info value4 = {.family = AF_INET}; + struct iptnl_info value6 = {.family = AF_INET6}; + const char *file = "./test_xdp.o"; + struct bpf_object *obj; + char buf[128]; + struct ipv6hdr *iph6 = (void *)buf + sizeof(struct ethhdr); + struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); + __u32 duration, retval, size; + int err, prog_fd, map_fd; + + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (err) + return; + + map_fd = bpf_find_map(__func__, obj, "vip2tnl"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &key4, &value4, 0); + bpf_map_update_elem(map_fd, &key6, &value6, 0); + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + + CHECK(err || errno || retval != XDP_TX || size != 74 || + iph->protocol != IPPROTO_IPIP, "ipv4", + "err %d errno %d retval %d size %d\n", + err, errno, retval, size); + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != XDP_TX || size != 114 || + iph6->nexthdr != IPPROTO_IPV6, "ipv6", + "err %d errno %d retval %d size %d\n", + err, errno, retval, size); +out: + bpf_object__close(obj); +} + +#define MAGIC_VAL 0x1234 +#define NUM_ITER 100000 +#define VIP_NUM 5 + +static void test_l4lb(void) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + const char *file = "./test_l4lb.o"; + struct vip key = {.protocol = 6}; + struct vip_meta { + __u32 flags; + __u32 vip_num; + } value = {.vip_num = VIP_NUM}; + __u32 stats_key = VIP_NUM; + struct vip_stats { + __u64 bytes; + __u64 pkts; + } stats[nr_cpus]; + struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; + } real_def = {.dst = MAGIC_VAL}; + __u32 ch_key = 11, real_num = 3; + __u32 duration, retval, size; + int err, i, prog_fd, map_fd; + __u64 bytes = 0, pkts = 0; + struct bpf_object *obj; + char buf[128]; + u32 *magic = (u32 *)buf; + + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + if (err) + return; + + map_fd = bpf_find_map(__func__, obj, "vip_map"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &key, &value, 0); + + map_fd = bpf_find_map(__func__, obj, "ch_rings"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &ch_key, &real_num, 0); + + map_fd = bpf_find_map(__func__, obj, "reals"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &real_num, &real_def, 0); + + err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 || + *magic != MAGIC_VAL, "ipv4", + "err %d errno %d retval %d size %d magic %x\n", + err, errno, retval, size, *magic); + + err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 || + *magic != MAGIC_VAL, "ipv6", + "err %d errno %d retval %d size %d magic %x\n", + err, errno, retval, size, *magic); + + map_fd = bpf_find_map(__func__, obj, "stats"); + if (map_fd < 0) + goto out; + bpf_map_lookup_elem(map_fd, &stats_key, stats); + for (i = 0; i < nr_cpus; i++) { + bytes += stats[i].bytes; + pkts += stats[i].pkts; + } + if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) { + error_cnt++; + printf("test_l4lb:FAIL:stats %lld %lld\n", bytes, pkts); + } +out: + bpf_object__close(obj); +} + +static void test_tcp_estats(void) +{ + const char *file = "./test_tcp_estats.o"; + int err, prog_fd; + struct bpf_object *obj; + __u32 duration = 0; + + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + CHECK(err, "", "err %d errno %d\n", err, errno); + if (err) + return; + + bpf_object__close(obj); +} + +int main(void) +{ + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; + + setrlimit(RLIMIT_MEMLOCK, &rinf); + + test_pkt_access(); + test_xdp(); + test_l4lb(); + test_tcp_estats(); + + printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); + return 0; +} diff --git a/tools/testing/selftests/bpf/test_tcp_estats.c b/tools/testing/selftests/bpf/test_tcp_estats.c new file mode 100644 index 000000000000..bee3bbecc0c4 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcp_estats.c @@ -0,0 +1,258 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +/* This program shows clang/llvm is able to generate code pattern + * like: + * _tcp_send_active_reset: + * 0: bf 16 00 00 00 00 00 00 r6 = r1 + * ...... + * 335: b7 01 00 00 0f 00 00 00 r1 = 15 + * 336: 05 00 48 00 00 00 00 00 goto 72 + * + * LBB0_3: + * 337: b7 01 00 00 01 00 00 00 r1 = 1 + * 338: 63 1a d0 ff 00 00 00 00 *(u32 *)(r10 - 48) = r1 + * 408: b7 01 00 00 03 00 00 00 r1 = 3 + * + * LBB0_4: + * 409: 71 a2 fe ff 00 00 00 00 r2 = *(u8 *)(r10 - 2) + * 410: bf a7 00 00 00 00 00 00 r7 = r10 + * 411: 07 07 00 00 b8 ff ff ff r7 += -72 + * 412: bf 73 00 00 00 00 00 00 r3 = r7 + * 413: 0f 13 00 00 00 00 00 00 r3 += r1 + * 414: 73 23 2d 00 00 00 00 00 *(u8 *)(r3 + 45) = r2 + * + * From the above code snippet, the code generated by the compiler + * is reasonable. The "r1" is assigned to different values in basic + * blocks "_tcp_send_active_reset" and "LBB0_3", and used in "LBB0_4". + * The verifier should be able to handle such code patterns. + */ +#include <string.h> +#include <linux/bpf.h> +#include <linux/ipv6.h> +#include <linux/version.h> +#include <sys/socket.h> +#include "bpf_helpers.h" + +#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) +#define TCP_ESTATS_MAGIC 0xBAADBEEF + +/* This test case needs "sock" and "pt_regs" data structure. + * Recursively, "sock" needs "sock_common" and "inet_sock". + * However, this is a unit test case only for + * verifier purpose without bpf program execution. + * We can safely mock much simpler data structures, basically + * only taking the necessary fields from kernel headers. + */ +typedef __u32 __bitwise __portpair; +typedef __u64 __bitwise __addrpair; + +struct sock_common { + unsigned short skc_family; + union { + __addrpair skc_addrpair; + struct { + __be32 skc_daddr; + __be32 skc_rcv_saddr; + }; + }; + union { + __portpair skc_portpair; + struct { + __be16 skc_dport; + __u16 skc_num; + }; + }; + struct in6_addr skc_v6_daddr; + struct in6_addr skc_v6_rcv_saddr; +}; + +struct sock { + struct sock_common __sk_common; +#define sk_family __sk_common.skc_family +#define sk_v6_daddr __sk_common.skc_v6_daddr +#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr +}; + +struct inet_sock { + struct sock sk; +#define inet_daddr sk.__sk_common.skc_daddr +#define inet_dport sk.__sk_common.skc_dport + __be32 inet_saddr; + __be16 inet_sport; +}; + +struct pt_regs { + long di; +}; + +static inline struct inet_sock *inet_sk(const struct sock *sk) +{ + return (struct inet_sock *)sk; +} + +/* Define various data structures for state recording. + * Some fields are not used due to test simplification. + */ +enum tcp_estats_addrtype { + TCP_ESTATS_ADDRTYPE_IPV4 = 1, + TCP_ESTATS_ADDRTYPE_IPV6 = 2 +}; + +enum tcp_estats_event_type { + TCP_ESTATS_ESTABLISH, + TCP_ESTATS_PERIODIC, + TCP_ESTATS_TIMEOUT, + TCP_ESTATS_RETRANSMIT_TIMEOUT, + TCP_ESTATS_RETRANSMIT_OTHER, + TCP_ESTATS_SYN_RETRANSMIT, + TCP_ESTATS_SYNACK_RETRANSMIT, + TCP_ESTATS_TERM, + TCP_ESTATS_TX_RESET, + TCP_ESTATS_RX_RESET, + TCP_ESTATS_WRITE_TIMEOUT, + TCP_ESTATS_CONN_TIMEOUT, + TCP_ESTATS_ACK_LATENCY, + TCP_ESTATS_NEVENTS, +}; + +struct tcp_estats_event { + int pid; + int cpu; + unsigned long ts; + unsigned int magic; + enum tcp_estats_event_type event_type; +}; + +/* The below data structure is packed in order for + * llvm compiler to generate expected code. + */ +struct tcp_estats_conn_id { + unsigned int localaddressType; + struct { + unsigned char data[16]; + } localaddress; + struct { + unsigned char data[16]; + } remaddress; + unsigned short localport; + unsigned short remport; +} __attribute__((__packed__)); + +struct tcp_estats_basic_event { + struct tcp_estats_event event; + struct tcp_estats_conn_id conn_id; +}; + +struct bpf_map_def SEC("maps") ev_record_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(struct tcp_estats_basic_event), + .max_entries = 1024, +}; + +struct dummy_tracepoint_args { + unsigned long long pad; + struct sock *sock; +}; + +static __always_inline void tcp_estats_ev_init(struct tcp_estats_event *event, + enum tcp_estats_event_type type) +{ + event->magic = TCP_ESTATS_MAGIC; + event->ts = bpf_ktime_get_ns(); + event->event_type = type; +} + +static __always_inline void unaligned_u32_set(unsigned char *to, __u8 *from) +{ + to[0] = _(from[0]); + to[1] = _(from[1]); + to[2] = _(from[2]); + to[3] = _(from[3]); +} + +static __always_inline void conn_id_ipv4_init(struct tcp_estats_conn_id *conn_id, + __be32 *saddr, __be32 *daddr) +{ + conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV4; + + unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr); + unaligned_u32_set(conn_id->remaddress.data, (__u8 *)daddr); +} + +static __always_inline void conn_id_ipv6_init(struct tcp_estats_conn_id *conn_id, + __be32 *saddr, __be32 *daddr) +{ + conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV6; + + unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr); + unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32), + (__u8 *)(saddr + 1)); + unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 2, + (__u8 *)(saddr + 2)); + unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 3, + (__u8 *)(saddr + 3)); + + unaligned_u32_set(conn_id->remaddress.data, + (__u8 *)(daddr)); + unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32), + (__u8 *)(daddr + 1)); + unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 2, + (__u8 *)(daddr + 2)); + unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 3, + (__u8 *)(daddr + 3)); +} + +static __always_inline void tcp_estats_conn_id_init(struct tcp_estats_conn_id *conn_id, + struct sock *sk) +{ + conn_id->localport = _(inet_sk(sk)->inet_sport); + conn_id->remport = _(inet_sk(sk)->inet_dport); + + if (_(sk->sk_family) == AF_INET6) + conn_id_ipv6_init(conn_id, + sk->sk_v6_rcv_saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32); + else + conn_id_ipv4_init(conn_id, + &inet_sk(sk)->inet_saddr, + &inet_sk(sk)->inet_daddr); +} + +static __always_inline void tcp_estats_init(struct sock *sk, + struct tcp_estats_event *event, + struct tcp_estats_conn_id *conn_id, + enum tcp_estats_event_type type) +{ + tcp_estats_ev_init(event, type); + tcp_estats_conn_id_init(conn_id, sk); +} + +static __always_inline void send_basic_event(struct sock *sk, + enum tcp_estats_event_type type) +{ + struct tcp_estats_basic_event ev; + __u32 key = bpf_get_prandom_u32(); + + memset(&ev, 0, sizeof(ev)); + tcp_estats_init(sk, &ev.event, &ev.conn_id, type); + bpf_map_update_elem(&ev_record_map, &key, &ev, BPF_ANY); +} + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ + if (!arg->sock) + return 0; + + send_basic_event(arg->sock, TCP_ESTATS_TX_RESET); + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index e1f5b9eea1e8..3773562056da 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -8,6 +8,8 @@ * License as published by the Free Software Foundation. */ +#include <asm/types.h> +#include <linux/types.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -28,6 +30,14 @@ #include <bpf/bpf.h> +#ifdef HAVE_GENHDR +# include "autoconf.h" +#else +# if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__) +# define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1 +# endif +#endif + #include "../../../include/linux/filter.h" #ifndef ARRAY_SIZE @@ -36,6 +46,9 @@ #define MAX_INSNS 512 #define MAX_FIXUPS 8 +#define MAX_NR_MAPS 4 + +#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) struct bpf_test { const char *descr; @@ -43,6 +56,7 @@ struct bpf_test { int fixup_map1[MAX_FIXUPS]; int fixup_map2[MAX_FIXUPS]; int fixup_prog[MAX_FIXUPS]; + int fixup_map_in_map[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; enum { @@ -51,6 +65,7 @@ struct bpf_test { REJECT } result, result_unpriv; enum bpf_prog_type prog_type; + uint8_t flags; }; /* Note we want this to be 64 bit aligned so that the end of our array is @@ -176,6 +191,86 @@ static struct bpf_test tests[] = { .result = REJECT, }, { + "test6 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "test7 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1), + BPF_RAW_INSN(0, 0, 0, 0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "test8 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 1, 1), + BPF_RAW_INSN(0, 0, 0, 0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "uses reserved fields", + .result = REJECT, + }, + { + "test9 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1), + BPF_RAW_INSN(0, 0, 0, 1, 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, + }, + { + "test10 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1), + BPF_RAW_INSN(0, BPF_REG_1, 0, 0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, + }, + { + "test11 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1), + BPF_RAW_INSN(0, 0, BPF_REG_1, 0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, + }, + { + "test12 ld_imm64", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, BPF_REG_1, 0, 1), + BPF_RAW_INSN(0, 0, 0, 0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "not pointing to valid bpf_map", + .result = REJECT, + }, + { + "test13 ld_imm64", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, BPF_REG_1, 0, 1), + BPF_RAW_INSN(0, 0, BPF_REG_1, 0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, + }, + { "no bpf_exit", .insns = { BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2), @@ -316,6 +411,30 @@ static struct bpf_test tests[] = { .result = REJECT, }, { + "invalid fp arithmetic", + /* If this gets ever changed, make sure JITs can deal with it. */ + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 8), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 pointer arithmetic", + .result_unpriv = REJECT, + .errstr = "R1 invalid mem access", + .result = REJECT, + }, + { + "non-invalid fp arithmetic", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { "invalid argument register", .insns = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, @@ -757,6 +876,9 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, vlan_tci)), BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, napi_id)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), BPF_EXIT_INSN(), }, .result = ACCEPT, @@ -1783,6 +1905,20 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { + "unpriv: adding of fp", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "pointer arithmetic prohibited", + .result_unpriv = REJECT, + .errstr = "R1 invalid mem access", + .result = REJECT, + }, + { "unpriv: cmp of stack pointer", .insns = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -1796,16 +1932,22 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { - "unpriv: obfuscate stack pointer", + "stack pointer arithmetic", .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1), + BPF_ST_MEM(0, BPF_REG_2, 4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_ST_MEM(0, BPF_REG_2, 4, 0), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R2 pointer arithmetic", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -2430,6 +2572,49 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { + "direct packet access: test15 (spill with xadd)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8), + BPF_MOV64_IMM(BPF_REG_5, 4096), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_STX_XADD(BPF_DW, BPF_REG_4, BPF_REG_5, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + BPF_STX_MEM(BPF_W, BPF_REG_2, BPF_REG_5, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R2 invalid mem access 'inv'", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test16 (arith on data_end)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 16), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid access to packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { "helper access to packet: test1, valid packet_ptr range", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, @@ -2932,6 +3117,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R0 pointer arithmetic prohibited", .result_unpriv = REJECT, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "valid map access into an array with a variable", @@ -2955,6 +3141,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R0 pointer arithmetic prohibited", .result_unpriv = REJECT, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "valid map access into an array with a signed variable", @@ -2982,6 +3169,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R0 pointer arithmetic prohibited", .result_unpriv = REJECT, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid map access into an array with a constant", @@ -3023,6 +3211,7 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is outside of the array range", .result_unpriv = REJECT, .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid map access into an array with a variable", @@ -3046,6 +3235,7 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result_unpriv = REJECT, .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid map access into an array with no floor check", @@ -3072,6 +3262,7 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result_unpriv = REJECT, .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid map access into an array with a invalid max check", @@ -3098,6 +3289,7 @@ static struct bpf_test tests[] = { .errstr = "invalid access to map value, value_size=48 off=44 size=8", .result_unpriv = REJECT, .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid map access into an array with a invalid max check", @@ -3127,6 +3319,7 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result_unpriv = REJECT, .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "multiple registers share map_lookup_elem result", @@ -3250,6 +3443,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr_unpriv = "R0 pointer arithmetic prohibited", .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "constant register |= constant should keep constant type", @@ -3416,6 +3610,26 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_LWT_XMIT, }, { + "overlapping checks for direct packet access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_LWT_XMIT, + }, + { "invalid access of tc_classid for LWT_IN", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, @@ -3959,7 +4173,208 @@ static struct bpf_test tests[] = { .result_unpriv = REJECT, }, { - "map element value (adjusted) is preserved across register spilling", + "map element value or null is marked on register spilling", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -152), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result = ACCEPT, + .result_unpriv = REJECT, + }, + { + "map element value store of cleared call register", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R1 !read_ok", + .errstr = "R1 !read_ok", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value with unaligned store", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 17), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 43), + BPF_ST_MEM(BPF_DW, BPF_REG_0, -2, 44), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 32), + BPF_ST_MEM(BPF_DW, BPF_REG_8, 2, 33), + BPF_ST_MEM(BPF_DW, BPF_REG_8, -2, 34), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 5), + BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 22), + BPF_ST_MEM(BPF_DW, BPF_REG_8, 4, 23), + BPF_ST_MEM(BPF_DW, BPF_REG_8, -7, 24), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 22), + BPF_ST_MEM(BPF_DW, BPF_REG_7, 4, 23), + BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .result = ACCEPT, + .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "map element value with unaligned load", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 9), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 2), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 5), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .result = ACCEPT, + .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "map element value illegal alu op, 1", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 8), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value illegal alu op, 2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value illegal alu op, 3", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, 42), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value illegal alu op, 4", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ENDIAN(BPF_FROM_BE, BPF_REG_0, 64), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value illegal alu op, 5", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_IMM(BPF_REG_3, 4096), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_STX_XADD(BPF_DW, BPF_REG_2, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .errstr = "R0 invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value is preserved across register spilling", .insns = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), @@ -3981,6 +4396,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R0 pointer arithmetic prohibited", .result = ACCEPT, .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "helper access to variable memory: stack, bitwise AND + JMP, correct bounds", @@ -4419,6 +4835,7 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result = REJECT, .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid range check", @@ -4450,6 +4867,76 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result = REJECT, .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "map in map access", + .insns = { + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_in_map = { 3 }, + .result = ACCEPT, + }, + { + "invalid inner map pointer", + .insns = { + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_in_map = { 3 }, + .errstr = "R1 type=inv expected=map_ptr", + .errstr_unpriv = "R1 pointer arithmetic prohibited", + .result = REJECT, + }, + { + "forgot null checking on the inner map pointer", + .insns = { + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_in_map = { 3 }, + .errstr = "R1 type=map_value_or_null expected=map_ptr", + .result = REJECT, } }; @@ -4487,55 +4974,90 @@ static int create_prog_array(void) return fd; } +static int create_map_in_map(void) +{ + int inner_map_fd, outer_map_fd; + + inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), + sizeof(int), 1, 0); + if (inner_map_fd < 0) { + printf("Failed to create array '%s'!\n", strerror(errno)); + return inner_map_fd; + } + + outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, + sizeof(int), inner_map_fd, 1, 0); + if (outer_map_fd < 0) + printf("Failed to create array of maps '%s'!\n", + strerror(errno)); + + close(inner_map_fd); + + return outer_map_fd; +} + static char bpf_vlog[32768]; static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, - int *fd_f1, int *fd_f2, int *fd_f3) + int *map_fds) { int *fixup_map1 = test->fixup_map1; int *fixup_map2 = test->fixup_map2; int *fixup_prog = test->fixup_prog; + int *fixup_map_in_map = test->fixup_map_in_map; /* Allocating HTs with 1 elem is fine here, since we only test * for verifier and not do a runtime lookup, so the only thing * that really matters is value size in this case. */ if (*fixup_map1) { - *fd_f1 = create_map(sizeof(long long), 1); + map_fds[0] = create_map(sizeof(long long), 1); do { - prog[*fixup_map1].imm = *fd_f1; + prog[*fixup_map1].imm = map_fds[0]; fixup_map1++; } while (*fixup_map1); } if (*fixup_map2) { - *fd_f2 = create_map(sizeof(struct test_val), 1); + map_fds[1] = create_map(sizeof(struct test_val), 1); do { - prog[*fixup_map2].imm = *fd_f2; + prog[*fixup_map2].imm = map_fds[1]; fixup_map2++; } while (*fixup_map2); } if (*fixup_prog) { - *fd_f3 = create_prog_array(); + map_fds[2] = create_prog_array(); do { - prog[*fixup_prog].imm = *fd_f3; + prog[*fixup_prog].imm = map_fds[2]; fixup_prog++; } while (*fixup_prog); } + + if (*fixup_map_in_map) { + map_fds[3] = create_map_in_map(); + do { + prog[*fixup_map_in_map].imm = map_fds[3]; + fixup_map_in_map++; + } while (*fixup_map_in_map); + } } static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { + int fd_prog, expected_ret, reject_from_alignment; struct bpf_insn *prog = test->insns; int prog_len = probe_filter_length(prog); int prog_type = test->prog_type; - int fd_f1 = -1, fd_f2 = -1, fd_f3 = -1; - int fd_prog, expected_ret; + int map_fds[MAX_NR_MAPS]; const char *expected_err; + int i; - do_test_fixup(test, prog, &fd_f1, &fd_f2, &fd_f3); + for (i = 0; i < MAX_NR_MAPS; i++) + map_fds[i] = -1; + + do_test_fixup(test, prog, map_fds); fd_prog = bpf_load_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, prog, prog_len, "GPL", 0, bpf_vlog, @@ -4545,8 +5067,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv, test->result_unpriv : test->result; expected_err = unpriv && test->errstr_unpriv ? test->errstr_unpriv : test->errstr; + + reject_from_alignment = fd_prog < 0 && + (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) && + strstr(bpf_vlog, "Unknown alignment."); +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (reject_from_alignment) { + printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n", + strerror(errno)); + goto fail_log; + } +#endif if (expected_ret == ACCEPT) { - if (fd_prog < 0) { + if (fd_prog < 0 && !reject_from_alignment) { printf("FAIL\nFailed to load prog '%s'!\n", strerror(errno)); goto fail_log; @@ -4556,19 +5089,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv, printf("FAIL\nUnexpected success to load!\n"); goto fail_log; } - if (!strstr(bpf_vlog, expected_err)) { + if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) { printf("FAIL\nUnexpected error message!\n"); goto fail_log; } } (*passes)++; - printf("OK\n"); + printf("OK%s\n", reject_from_alignment ? + " (NOTE: reject due to unknown alignment)" : ""); close_fds: close(fd_prog); - close(fd_f1); - close(fd_f2); - close(fd_f3); + for (i = 0; i < MAX_NR_MAPS; i++) + close(map_fds[i]); sched_yield(); return; fail_log: @@ -4583,10 +5116,12 @@ static bool is_admin(void) cap_flag_value_t sysadmin = CAP_CLEAR; const cap_value_t cap_val = CAP_SYS_ADMIN; +#ifdef CAP_IS_SUPPORTED if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) { perror("cap_get_flag"); return false; } +#endif caps = cap_get_proc(); if (!caps) { perror("cap_get_proc"); diff --git a/tools/testing/selftests/bpf/test_xdp.c b/tools/testing/selftests/bpf/test_xdp.c new file mode 100644 index 000000000000..5e7df8bb5b5d --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp.c @@ -0,0 +1,235 @@ +/* Copyright (c) 2016,2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/in.h> +#include <linux/udp.h> +#include <linux/tcp.h> +#include <linux/pkt_cls.h> +#include <sys/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" +#include "test_iptunnel_common.h" + +int _version SEC("version") = 1; + +struct bpf_map_def SEC("maps") rxcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = 256, +}; + +struct bpf_map_def SEC("maps") vip2tnl = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip), + .value_size = sizeof(struct iptnl_info), + .max_entries = MAX_IPTNL_ENTRIES, +}; + +static __always_inline void count_tx(__u32 protocol) +{ + __u64 *rxcnt_count; + + rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol); + if (rxcnt_count) + *rxcnt_count += 1; +} + +static __always_inline int get_dport(void *trans_data, void *data_end, + __u8 protocol) +{ + struct tcphdr *th; + struct udphdr *uh; + + switch (protocol) { + case IPPROTO_TCP: + th = (struct tcphdr *)trans_data; + if (th + 1 > data_end) + return -1; + return th->dest; + case IPPROTO_UDP: + uh = (struct udphdr *)trans_data; + if (uh + 1 > data_end) + return -1; + return uh->dest; + default: + return 0; + } +} + +static __always_inline void set_ethhdr(struct ethhdr *new_eth, + const struct ethhdr *old_eth, + const struct iptnl_info *tnl, + __be16 h_proto) +{ + memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source)); + memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest)); + new_eth->h_proto = h_proto; +} + +static __always_inline int handle_ipv4(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct iptnl_info *tnl; + struct ethhdr *new_eth; + struct ethhdr *old_eth; + struct iphdr *iph = data + sizeof(struct ethhdr); + __u16 *next_iph; + __u16 payload_len; + struct vip vip = {}; + int dport; + __u32 csum = 0; + int i; + + if (iph + 1 > data_end) + return XDP_DROP; + + dport = get_dport(iph + 1, data_end, iph->protocol); + if (dport == -1) + return XDP_DROP; + + vip.protocol = iph->protocol; + vip.family = AF_INET; + vip.daddr.v4 = iph->daddr; + vip.dport = dport; + payload_len = bpf_ntohs(iph->tot_len); + + tnl = bpf_map_lookup_elem(&vip2tnl, &vip); + /* It only does v4-in-v4 */ + if (!tnl || tnl->family != AF_INET) + return XDP_PASS; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) + return XDP_DROP; + + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + + new_eth = data; + iph = data + sizeof(*new_eth); + old_eth = data + sizeof(*iph); + + if (new_eth + 1 > data_end || + old_eth + 1 > data_end || + iph + 1 > data_end) + return XDP_DROP; + + set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP)); + + iph->version = 4; + iph->ihl = sizeof(*iph) >> 2; + iph->frag_off = 0; + iph->protocol = IPPROTO_IPIP; + iph->check = 0; + iph->tos = 0; + iph->tot_len = bpf_htons(payload_len + sizeof(*iph)); + iph->daddr = tnl->daddr.v4; + iph->saddr = tnl->saddr.v4; + iph->ttl = 8; + + next_iph = (__u16 *)iph; +#pragma clang loop unroll(full) + for (i = 0; i < sizeof(*iph) >> 1; i++) + csum += *next_iph++; + + iph->check = ~((csum & 0xffff) + (csum >> 16)); + + count_tx(vip.protocol); + + return XDP_TX; +} + +static __always_inline int handle_ipv6(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct iptnl_info *tnl; + struct ethhdr *new_eth; + struct ethhdr *old_eth; + struct ipv6hdr *ip6h = data + sizeof(struct ethhdr); + __u16 payload_len; + struct vip vip = {}; + int dport; + + if (ip6h + 1 > data_end) + return XDP_DROP; + + dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr); + if (dport == -1) + return XDP_DROP; + + vip.protocol = ip6h->nexthdr; + vip.family = AF_INET6; + memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr)); + vip.dport = dport; + payload_len = ip6h->payload_len; + + tnl = bpf_map_lookup_elem(&vip2tnl, &vip); + /* It only does v6-in-v6 */ + if (!tnl || tnl->family != AF_INET6) + return XDP_PASS; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) + return XDP_DROP; + + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + + new_eth = data; + ip6h = data + sizeof(*new_eth); + old_eth = data + sizeof(*ip6h); + + if (new_eth + 1 > data_end || old_eth + 1 > data_end || + ip6h + 1 > data_end) + return XDP_DROP; + + set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6)); + + ip6h->version = 6; + ip6h->priority = 0; + memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); + ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + sizeof(*ip6h)); + ip6h->nexthdr = IPPROTO_IPV6; + ip6h->hop_limit = 8; + memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6)); + memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6)); + + count_tx(vip.protocol); + + return XDP_TX; +} + +SEC("xdp_tx_iptunnel") +int _xdp_tx_iptunnel(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct ethhdr *eth = data; + __u16 h_proto; + + if (eth + 1 > data_end) + return XDP_DROP; + + h_proto = eth->h_proto; + + if (h_proto == bpf_htons(ETH_P_IP)) + return handle_ipv4(xdp); + else if (h_proto == bpf_htons(ETH_P_IPV6)) + + return handle_ipv6(xdp); + else + return XDP_DROP; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/drivers/gpu/i915.sh b/tools/testing/selftests/drivers/gpu/i915.sh index d407f0fa1e3a..c06d6e8a8dcc 100755 --- a/tools/testing/selftests/drivers/gpu/i915.sh +++ b/tools/testing/selftests/drivers/gpu/i915.sh @@ -7,6 +7,7 @@ if ! /sbin/modprobe -q -r i915; then fi if /sbin/modprobe -q i915 mock_selftests=-1; then + /sbin/modprobe -q -r i915 echo "drivers/gpu/i915: ok" else echo "drivers/gpu/i915: [FAIL]" diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 52e3c4df28d6..32e6211e1c6e 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -16,6 +16,7 @@ echo " -k|--keep Keep passed test logs" echo " -v|--verbose Increase verbosity of test messages" echo " -vv Alias of -v -v (Show all results in stdout)" echo " -d|--debug Debug mode (trace all shell commands)" +echo " -l|--logdir <dir> Save logs on the <dir>" exit $1 } @@ -64,6 +65,10 @@ parse_opts() { # opts DEBUG=1 shift 1 ;; + --logdir|-l) + LOG_DIR=$2 + shift 2 + ;; *.tc) if [ -f "$1" ]; then OPT_TEST_CASES="$OPT_TEST_CASES `abspath $1`" @@ -145,11 +150,16 @@ XFAILED_CASES= UNDEFINED_CASES= TOTAL_RESULT=0 +INSTANCE= CASENO=0 testcase() { # testfile CASENO=$((CASENO+1)) desc=`grep "^#[ \t]*description:" $1 | cut -f2 -d:` - prlog -n "[$CASENO]$desc" + prlog -n "[$CASENO]$INSTANCE$desc" +} + +test_on_instance() { # testfile + grep -q "^#[ \t]*flags:.*instance" $1 } eval_result() { # sigval @@ -266,6 +276,17 @@ for t in $TEST_CASES; do run_test $t done +# Test on instance loop +INSTANCE=" (instance) " +for t in $TEST_CASES; do + test_on_instance $t || continue + SAVED_TRACING_DIR=$TRACING_DIR + export TRACING_DIR=`mktemp -d $TRACING_DIR/instances/ftracetest.XXXXXX` + run_test $t + rmdir $TRACING_DIR + TRACING_DIR=$SAVED_TRACING_DIR +done + prlog "" prlog "# of passed: " `echo $PASSED_CASES | wc -w` prlog "# of failed: " `echo $FAILED_CASES | wc -w` diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc index bf9a7b037924..ebfce83f35b4 100644 --- a/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc +++ b/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: Basic test for tracers +# flags: instance test -f available_tracers for t in `cat available_tracers`; do echo $t > current_tracer diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc index bde6625d9785..9e33f841812f 100644 --- a/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc +++ b/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: Basic trace clock test +# flags: instance test -f trace_clock for c in `cat trace_clock | tr -d \[\]`; do echo $c > trace_clock diff --git a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc index 87eb9d6dd4ca..283b45ecb199 100644 --- a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: event tracing - enable/disable with event level files +# flags: instance do_reset() { echo > set_event diff --git a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc index d4ab27b522f8..96c1a95be4f7 100644 --- a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: event tracing - restricts events based on pid +# flags: instance do_reset() { echo > set_event diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc index ced27ef0638f..b8fe2e5b9e67 100644 --- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: event tracing - enable/disable with subsystem level files +# flags: instance do_reset() { echo > set_event diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc new file mode 100644 index 000000000000..bab5ff7c607e --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc @@ -0,0 +1,117 @@ +#!/bin/sh +# description: ftrace - function pid filters + +# Make sure that function pid matching filter works. +# Also test it on an instance directory + +if ! grep -q function available_tracers; then + echo "no function tracer configured" + exit_unsupported +fi + +if [ ! -f set_ftrace_pid ]; then + echo "set_ftrace_pid not found? Is function tracer not set?" + exit_unsupported +fi + +if [ ! -f set_ftrace_filter ]; then + echo "set_ftrace_filter not found? Is function tracer not set?" + exit_unsupported +fi + +do_function_fork=1 + +if [ ! -f options/function-fork ]; then + do_function_fork=0 + echo "no option for function-fork found. Option will not be tested." +fi + +read PID _ < /proc/self/stat + +if [ $do_function_fork -eq 1 ]; then + # default value of function-fork option + orig_value=`grep function-fork trace_options` +fi + +do_reset() { + reset_tracer + clear_trace + enable_tracing + echo > set_ftrace_filter + echo > set_ftrace_pid + + if [ $do_function_fork -eq 0 ]; then + return + fi + + echo $orig_value > trace_options +} + +fail() { # msg + do_reset + echo $1 + exit $FAIL +} + +yield() { + ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 +} + +do_test() { + disable_tracing + + echo do_execve* > set_ftrace_filter + echo *do_fork >> set_ftrace_filter + + echo $PID > set_ftrace_pid + echo function > current_tracer + + if [ $do_function_fork -eq 1 ]; then + # don't allow children to be traced + echo nofunction-fork > trace_options + fi + + enable_tracing + yield + + count_pid=`cat trace | grep -v ^# | grep $PID | wc -l` + count_other=`cat trace | grep -v ^# | grep -v $PID | wc -l` + + # count_other should be 0 + if [ $count_pid -eq 0 -o $count_other -ne 0 ]; then + fail "PID filtering not working?" + fi + + disable_tracing + clear_trace + + if [ $do_function_fork -eq 0 ]; then + return + fi + + # allow children to be traced + echo function-fork > trace_options + + enable_tracing + yield + + count_pid=`cat trace | grep -v ^# | grep $PID | wc -l` + count_other=`cat trace | grep -v ^# | grep -v $PID | wc -l` + + # count_other should NOT be 0 + if [ $count_pid -eq 0 -o $count_other -eq 0 ]; then + fail "PID filtering not following fork?" + fi +} + +do_test + +mkdir instances/foo +cd instances/foo +do_test +cd ../../ +rmdir instances/foo + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc new file mode 100644 index 000000000000..07bb3e5930b4 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc @@ -0,0 +1,114 @@ +#!/bin/sh +# description: ftrace - test for function event triggers +# flags: instance +# +# Ftrace allows to add triggers to functions, such as enabling or disabling +# tracing, enabling or disabling trace events, or recording a stack trace +# within the ring buffer. +# +# This test is designed to test event triggers +# + +# The triggers are set within the set_ftrace_filter file +if [ ! -f set_ftrace_filter ]; then + echo "set_ftrace_filter not found? Is dynamic ftrace not set?" + exit_unsupported +fi + +do_reset() { + reset_ftrace_filter + reset_tracer + disable_events + clear_trace + enable_tracing +} + +fail() { # mesg + do_reset + echo $1 + exit $FAIL +} + +SLEEP_TIME=".1" + +do_reset + +echo "Testing function probes with events:" + +EVENT="sched:sched_switch" +EVENT_ENABLE="events/sched/sched_switch/enable" + +cnt_trace() { + grep -v '^#' trace | wc -l +} + +test_event_enabled() { + val=$1 + + e=`cat $EVENT_ENABLE` + if [ "$e" != $val ]; then + echo "Expected $val but found $e" + exit -1 + fi +} + +run_enable_disable() { + enable=$1 # enable + Enable=$2 # Enable + check_disable=$3 # 0 + check_enable_star=$4 # 1* + check_disable_star=$5 # 0* + + cnt=`cnt_trace` + if [ $cnt -ne 0 ]; then + fail "Found junk in trace file" + fi + + echo "$Enable event all the time" + + echo $check_disable > $EVENT_ENABLE + sleep $SLEEP_TIME + + test_event_enabled $check_disable + + echo "schedule:${enable}_event:$EVENT" > set_ftrace_filter + + echo " make sure it works 5 times" + + for i in `seq 5`; do + sleep $SLEEP_TIME + echo " test $i" + test_event_enabled $check_enable_star + + echo $check_disable > $EVENT_ENABLE + done + sleep $SLEEP_TIME + echo " make sure it's still works" + test_event_enabled $check_enable_star + + reset_ftrace_filter + + echo " make sure it only works 3 times" + + echo $check_disable > $EVENT_ENABLE + sleep $SLEEP_TIME + + echo "schedule:${enable}_event:$EVENT:3" > set_ftrace_filter + + for i in `seq 3`; do + sleep $SLEEP_TIME + echo " test $i" + test_event_enabled $check_enable_star + + echo $check_disable > $EVENT_ENABLE + done + + sleep $SLEEP_TIME + echo " make sure it stop working" + test_event_enabled $check_disable_star + + do_reset +} + +run_enable_disable enable Enable 0 "1*" "0*" +run_enable_disable disable Disable 1 "0*" "1*" diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc new file mode 100644 index 000000000000..113b4d9bc733 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc @@ -0,0 +1,132 @@ +#!/bin/sh +# description: ftrace - test reading of set_ftrace_filter +# +# The set_ftrace_filter file of ftrace is used to list functions as well as +# triggers (probes) attached to functions. The code to read this file is not +# straight forward and has had various bugs in the past. This test is designed +# to add functions and triggers to that file in various ways and read that +# file in various ways (cat vs dd). +# + +# The triggers are set within the set_ftrace_filter file +if [ ! -f set_ftrace_filter ]; then + echo "set_ftrace_filter not found? Is dynamic ftrace not set?" + exit_unsupported +fi + +do_reset() { + reset_tracer + reset_ftrace_filter + disable_events + clear_trace + enable_tracing +} + +fail() { # mesg + do_reset + echo $1 + exit $FAIL +} + +do_reset + +FILTER=set_ftrace_filter +FUNC1="schedule" +FUNC2="do_IRQ" + +ALL_FUNCS="#### all functions enabled ####" + +test_func() { + if ! echo "$1" | grep -q "^$2\$"; then + return 0 + fi + echo "$1" | grep -v "^$2\$" + return 1 +} + +check_set_ftrace_filter() { + cat=`cat $FILTER` + dd1=`dd if=$FILTER bs=1 | grep -v -e 'records in' -e 'records out' -e 'bytes copied'` + dd100=`dd if=$FILTER bs=100 | grep -v -e 'records in' -e 'records out' -e 'bytes copied'` + + echo "Testing '$@'" + + while [ $# -gt 0 ]; do + echo "test $1" + if cat=`test_func "$cat" "$1"`; then + return 0 + fi + if dd1=`test_func "$dd1" "$1"`; then + return 0 + fi + if dd100=`test_func "$dd100" "$1"`; then + return 0 + fi + shift + done + + if [ -n "$cat" ]; then + return 0 + fi + if [ -n "$dd1" ]; then + return 0 + fi + if [ -n "$dd100" ]; then + return 0 + fi + return 1; +} + +if check_set_ftrace_filter "$ALL_FUNCS"; then + fail "Expected only $ALL_FUNCS" +fi + +echo "$FUNC1:traceoff" > set_ftrace_filter +if check_set_ftrace_filter "$ALL_FUNCS" "$FUNC1:traceoff:unlimited"; then + fail "Expected $ALL_FUNCS and $FUNC1:traceoff:unlimited" +fi + +echo "$FUNC1" > set_ftrace_filter +if check_set_ftrace_filter "$FUNC1" "$FUNC1:traceoff:unlimited"; then + fail "Expected $FUNC1 and $FUNC1:traceoff:unlimited" +fi + +echo "$FUNC2" >> set_ftrace_filter +if check_set_ftrace_filter "$FUNC1" "$FUNC2" "$FUNC1:traceoff:unlimited"; then + fail "Expected $FUNC1 $FUNC2 and $FUNC1:traceoff:unlimited" +fi + +echo "$FUNC2:traceoff" >> set_ftrace_filter +if check_set_ftrace_filter "$FUNC1" "$FUNC2" "$FUNC1:traceoff:unlimited" "$FUNC2:traceoff:unlimited"; then + fail "Expected $FUNC1 $FUNC2 $FUNC1:traceoff:unlimited and $FUNC2:traceoff:unlimited" +fi + +echo "$FUNC1" > set_ftrace_filter +if check_set_ftrace_filter "$FUNC1" "$FUNC1:traceoff:unlimited" "$FUNC2:traceoff:unlimited"; then + fail "Expected $FUNC1 $FUNC1:traceoff:unlimited and $FUNC2:traceoff:unlimited" +fi + +echo > set_ftrace_filter +if check_set_ftrace_filter "$ALL_FUNCS" "$FUNC1:traceoff:unlimited" "$FUNC2:traceoff:unlimited"; then + fail "Expected $ALL_FUNCS $FUNC1:traceoff:unlimited and $FUNC2:traceoff:unlimited" +fi + +reset_ftrace_filter + +if check_set_ftrace_filter "$ALL_FUNCS"; then + fail "Expected $ALL_FUNCS" +fi + +echo "$FUNC1" > set_ftrace_filter +if check_set_ftrace_filter "$FUNC1" ; then + fail "Expected $FUNC1" +fi + +echo "$FUNC2" >> set_ftrace_filter +if check_set_ftrace_filter "$FUNC1" "$FUNC2" ; then + fail "Expected $FUNC1 and $FUNC2" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc new file mode 100644 index 000000000000..c8e02ec01eaf --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc @@ -0,0 +1,172 @@ +#!/bin/sh +# description: ftrace - test for function traceon/off triggers +# flags: instance +# +# Ftrace allows to add triggers to functions, such as enabling or disabling +# tracing, enabling or disabling trace events, or recording a stack trace +# within the ring buffer. +# +# This test is designed to test enabling and disabling tracing triggers +# + +# The triggers are set within the set_ftrace_filter file +if [ ! -f set_ftrace_filter ]; then + echo "set_ftrace_filter not found? Is dynamic ftrace not set?" + exit_unsupported +fi + +do_reset() { + reset_ftrace_filter + reset_tracer + disable_events + clear_trace + enable_tracing +} + +fail() { # mesg + do_reset + echo $1 + exit $FAIL +} + +SLEEP_TIME=".1" + +do_reset + +echo "Testing function probes with enabling disabling tracing:" + +cnt_trace() { + grep -v '^#' trace | wc -l +} + +echo '** DISABLE TRACING' +disable_tracing +clear_trace + +cnt=`cnt_trace` +if [ $cnt -ne 0 ]; then + fail "Found junk in trace" +fi + + +echo '** ENABLE EVENTS' + +echo 1 > events/enable + +echo '** ENABLE TRACING' +enable_tracing + +cnt=`cnt_trace` +if [ $cnt -eq 0 ]; then + fail "Nothing found in trace" +fi + +# powerpc uses .schedule +func="schedule" +x=`grep '^\.schedule$' available_filter_functions | wc -l` +if [ "$x" -eq 1 ]; then + func=".schedule" +fi + +echo '** SET TRACEOFF' + +echo "$func:traceoff" > set_ftrace_filter + +cnt=`grep schedule set_ftrace_filter | wc -l` +if [ $cnt -ne 1 ]; then + fail "Did not find traceoff trigger" +fi + +cnt=`cnt_trace` +sleep $SLEEP_TIME +cnt2=`cnt_trace` + +if [ $cnt -ne $cnt2 ]; then + fail "Tracing is not stopped" +fi + +on=`cat tracing_on` +if [ $on != "0" ]; then + fail "Tracing is not off" +fi + +line1=`cat trace | tail -1` +sleep $SLEEP_TIME +line2=`cat trace | tail -1` + +if [ "$line1" != "$line2" ]; then + fail "Tracing file is still changing" +fi + +clear_trace + +cnt=`cnt_trace` +if [ $cnt -ne 0 ]; then + fail "Tracing is still happeing" +fi + +echo "!$func:traceoff" >> set_ftrace_filter + +cnt=`grep schedule set_ftrace_filter | wc -l` +if [ $cnt -ne 0 ]; then + fail "traceoff trigger still exists" +fi + +on=`cat tracing_on` +if [ $on != "0" ]; then + fail "Tracing is started again" +fi + +echo "$func:traceon" > set_ftrace_filter + +cnt=`grep schedule set_ftrace_filter | wc -l` +if [ $cnt -ne 1 ]; then + fail "traceon trigger not found" +fi + +cnt=`cnt_trace` +if [ $cnt -eq 0 ]; then + fail "Tracing did not start" +fi + +on=`cat tracing_on` +if [ $on != "1" ]; then + fail "Tracing was not enabled" +fi + + +echo "!$func:traceon" >> set_ftrace_filter + +cnt=`grep schedule set_ftrace_filter | wc -l` +if [ $cnt -ne 0 ]; then + fail "traceon trigger still exists" +fi + +check_sleep() { + val=$1 + sleep $SLEEP_TIME + cat set_ftrace_filter + on=`cat tracing_on` + if [ $on != "$val" ]; then + fail "Expected tracing_on to be $val, but it was $on" + fi +} + + +echo "$func:traceoff:3" > set_ftrace_filter +check_sleep "0" +echo 1 > tracing_on +check_sleep "0" +echo 1 > tracing_on +check_sleep "0" +echo 1 > tracing_on +check_sleep "1" +echo "!$func:traceoff:0" > set_ftrace_filter + +if grep -e traceon -e traceoff set_ftrace_filter; then + fail "Tracing on and off triggers still exist" +fi + +disable_events + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 91de1a8e4f19..9aec6fcb7729 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -30,6 +30,27 @@ reset_events_filter() { # reset all current setting filters done } +reset_ftrace_filter() { # reset all triggers in set_ftrace_filter + echo > set_ftrace_filter + grep -v '^#' set_ftrace_filter | while read t; do + tr=`echo $t | cut -d: -f2` + if [ "$tr" == "" ]; then + continue + fi + if [ $tr == "enable_event" -o $tr == "disable_event" ]; then + tr=`echo $t | cut -d: -f1-4` + limit=`echo $t | cut -d: -f5` + else + tr=`echo $t | cut -d: -f1-2` + limit=`echo $t | cut -d: -f3` + fi + if [ "$limit" != "unlimited" ]; then + tr="$tr:$limit" + fi + echo "!$tr" > set_ftrace_filter + done +} + disable_events() { echo 0 > events/enable } diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc new file mode 100644 index 000000000000..57abdf1caabf --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc @@ -0,0 +1,39 @@ +#!/bin/sh +# description: Kretprobe dynamic event with maxactive + +[ -f kprobe_events ] || exit_unsupported # this is configurable + +echo > kprobe_events + +# Test if we successfully reject unknown messages +if echo 'a:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi + +# Test if we successfully reject too big maxactive +if echo 'r1000000:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi + +# Test if we successfully reject unparsable numbers for maxactive +if echo 'r10fuzz:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi + +# Test for kretprobe with event name without maxactive +echo 'r:myprobeaccept inet_csk_accept' > kprobe_events +grep myprobeaccept kprobe_events +test -d events/kprobes/myprobeaccept +echo '-:myprobeaccept' >> kprobe_events + +# Test for kretprobe with event name with a small maxactive +echo 'r10:myprobeaccept inet_csk_accept' > kprobe_events +grep myprobeaccept kprobe_events +test -d events/kprobes/myprobeaccept +echo '-:myprobeaccept' >> kprobe_events + +# Test for kretprobe without event name without maxactive +echo 'r inet_csk_accept' > kprobe_events +grep inet_csk_accept kprobe_events +echo > kprobe_events + +# Test for kretprobe without event name with a small maxactive +echo 'r10 inet_csk_accept' > kprobe_events +grep inet_csk_accept kprobe_events +echo > kprobe_events + +clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc index 1a9445021bf1..c5435adfdd93 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: event trigger - test event enable/disable trigger +# flags: instance do_reset() { reset_trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc index 514e466e198b..48849a8d577f 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: event trigger - test trigger filter +# flags: instance do_reset() { reset_trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc index 400e98b64948..b7f86d10b549 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: event trigger - test histogram modifiers +# flags: instance do_reset() { reset_trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc index a00184cd9c95..fb66f7d9339d 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: event trigger - test histogram trigger +# flags: instance do_reset() { reset_trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc index 3478b00ead57..f9153087dd7c 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: event trigger - test multiple histogram triggers +# flags: instance do_reset() { reset_trigger diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index fbfe5d0d5c2e..35cbb4cba410 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -5,7 +5,7 @@ CFLAGS += -I../../../../usr/include/ reuseport_bpf_numa: LDFLAGS += -lnuma -TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh +TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh new file mode 100755 index 000000000000..4e00568d70c2 --- /dev/null +++ b/tools/testing/selftests/net/netdevice.sh @@ -0,0 +1,200 @@ +#!/bin/sh +# +# This test is for checking network interface +# For the moment it tests only ethernet interface (but wifi could be easily added) +# +# We assume that all network driver are loaded +# if not they probably have failed earlier in the boot process and their logged error will be catched by another test +# + +# this function will try to up the interface +# if already up, nothing done +# arg1: network interface name +kci_net_start() +{ + netdev=$1 + + ip link show "$netdev" |grep -q UP + if [ $? -eq 0 ];then + echo "SKIP: $netdev: interface already up" + return 0 + fi + + ip link set "$netdev" up + if [ $? -ne 0 ];then + echo "FAIL: $netdev: Fail to up interface" + return 1 + else + echo "PASS: $netdev: set interface up" + NETDEV_STARTED=1 + fi + return 0 +} + +# this function will try to setup an IP and MAC address on a network interface +# Doing nothing if the interface was already up +# arg1: network interface name +kci_net_setup() +{ + netdev=$1 + + # do nothing if the interface was already up + if [ $NETDEV_STARTED -eq 0 ];then + return 0 + fi + + MACADDR='02:03:04:05:06:07' + ip link set dev $netdev address "$MACADDR" + if [ $? -ne 0 ];then + echo "FAIL: $netdev: Cannot set MAC address" + else + ip link show $netdev |grep -q "$MACADDR" + if [ $? -eq 0 ];then + echo "PASS: $netdev: set MAC address" + else + echo "FAIL: $netdev: Cannot set MAC address" + fi + fi + + #check that the interface did not already have an IP + ip address show "$netdev" |grep '^[[:space:]]*inet' + if [ $? -eq 0 ];then + echo "SKIP: $netdev: already have an IP" + return 0 + fi + + # TODO what ipaddr to set ? DHCP ? + echo "SKIP: $netdev: set IP address" + return 0 +} + +# test an ethtool command +# arg1: return code for not supported (see ethtool code source) +# arg2: summary of the command +# arg3: command to execute +kci_netdev_ethtool_test() +{ + if [ $# -le 2 ];then + echo "SKIP: $netdev: ethtool: invalid number of arguments" + return 1 + fi + $3 >/dev/null + ret=$? + if [ $ret -ne 0 ];then + if [ $ret -eq "$1" ];then + echo "SKIP: $netdev: ethtool $2 not supported" + else + echo "FAIL: $netdev: ethtool $2" + return 1 + fi + else + echo "PASS: $netdev: ethtool $2" + fi + return 0 +} + +# test ethtool commands +# arg1: network interface name +kci_netdev_ethtool() +{ + netdev=$1 + + #check presence of ethtool + ethtool --version 2>/dev/null >/dev/null + if [ $? -ne 0 ];then + echo "SKIP: ethtool not present" + return 1 + fi + + TMP_ETHTOOL_FEATURES="$(mktemp)" + if [ ! -e "$TMP_ETHTOOL_FEATURES" ];then + echo "SKIP: Cannot create a tmp file" + return 1 + fi + + ethtool -k "$netdev" > "$TMP_ETHTOOL_FEATURES" + if [ $? -ne 0 ];then + echo "FAIL: $netdev: ethtool list features" + rm "$TMP_ETHTOOL_FEATURES" + return 1 + fi + echo "PASS: $netdev: ethtool list features" + #TODO for each non fixed features, try to turn them on/off + rm "$TMP_ETHTOOL_FEATURES" + + kci_netdev_ethtool_test 74 'dump' "ethtool -d $netdev" + kci_netdev_ethtool_test 94 'stats' "ethtool -S $netdev" + return 0 +} + +# stop a netdev +# arg1: network interface name +kci_netdev_stop() +{ + netdev=$1 + + if [ $NETDEV_STARTED -eq 0 ];then + echo "SKIP: $netdev: interface kept up" + return 0 + fi + + ip link set "$netdev" down + if [ $? -ne 0 ];then + echo "FAIL: $netdev: stop interface" + return 1 + fi + echo "PASS: $netdev: stop interface" + return 0 +} + +# run all test on a netdev +# arg1: network interface name +kci_test_netdev() +{ + NETDEV_STARTED=0 + IFACE_TO_UPDOWN="$1" + IFACE_TO_TEST="$1" + #check for VLAN interface + MASTER_IFACE="$(echo $1 | cut -d@ -f2)" + if [ ! -z "$MASTER_IFACE" ];then + IFACE_TO_UPDOWN="$MASTER_IFACE" + IFACE_TO_TEST="$(echo $1 | cut -d@ -f1)" + fi + + NETDEV_STARTED=0 + kci_net_start "$IFACE_TO_UPDOWN" + + kci_net_setup "$IFACE_TO_TEST" + + kci_netdev_ethtool "$IFACE_TO_TEST" + + kci_netdev_stop "$IFACE_TO_UPDOWN" + return 0 +} + +#check for needed privileges +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +ip -Version 2>/dev/null >/dev/null +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without the ip tool" + exit 0 +fi + +TMP_LIST_NETDEV="$(mktemp)" +if [ ! -e "$TMP_LIST_NETDEV" ];then + echo "FAIL: Cannot create a tmp file" + exit 1 +fi + +ip link show |grep '^[0-9]' | grep -oE '[[:space:]].*eth[0-9]*:|[[:space:]].*enp[0-9]s[0-9]:' | cut -d\ -f2 | cut -d: -f1> "$TMP_LIST_NETDEV" +while read netdev +do + kci_test_netdev "$netdev" +done < "$TMP_LIST_NETDEV" + +rm "$TMP_LIST_NETDEV" +exit 0 diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 412459369686..989f917068d1 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -71,18 +71,17 @@ /* Open a socket in a given fanout mode. * @return -1 if mode is bad, a valid socket otherwise */ -static int sock_fanout_open(uint16_t typeflags, int num_packets) +static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) { int fd, val; - fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP)); + fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP)); if (fd < 0) { perror("socket packet"); exit(1); } - /* fanout group ID is always 0: tests whether old groups are deleted */ - val = ((int) typeflags) << 16; + val = (((int) typeflags) << 16) | group_id; if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) { if (close(fd)) { perror("close packet"); @@ -95,6 +94,38 @@ static int sock_fanout_open(uint16_t typeflags, int num_packets) return fd; } +static void sock_fanout_set_cbpf(int fd) +{ + struct sock_filter bpf_filter[] = { + BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80), /* ldb [80] */ + BPF_STMT(BPF_RET+BPF_A, 0), /* ret A */ + }; + struct sock_fprog bpf_prog; + + bpf_prog.filter = bpf_filter; + bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter); + + if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &bpf_prog, + sizeof(bpf_prog))) { + perror("fanout data cbpf"); + exit(1); + } +} + +static void sock_fanout_getopts(int fd, uint16_t *typeflags, uint16_t *group_id) +{ + int sockopt; + socklen_t sockopt_len = sizeof(sockopt); + + if (getsockopt(fd, SOL_PACKET, PACKET_FANOUT, + &sockopt, &sockopt_len)) { + perror("failed to getsockopt"); + exit(1); + } + *typeflags = sockopt >> 16; + *group_id = sockopt & 0xfffff; +} + static void sock_fanout_set_ebpf(int fd) { const int len_off = __builtin_offsetof(struct __sk_buff, len); @@ -223,26 +254,26 @@ static void test_control_group(void) fprintf(stderr, "test: control multiple sockets\n"); - fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 20); + fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0); if (fds[0] == -1) { fprintf(stderr, "ERROR: failed to open HASH socket\n"); exit(1); } if (sock_fanout_open(PACKET_FANOUT_HASH | - PACKET_FANOUT_FLAG_DEFRAG, 10) != -1) { + PACKET_FANOUT_FLAG_DEFRAG, 0) != -1) { fprintf(stderr, "ERROR: joined group with wrong flag defrag\n"); exit(1); } if (sock_fanout_open(PACKET_FANOUT_HASH | - PACKET_FANOUT_FLAG_ROLLOVER, 10) != -1) { + PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) { fprintf(stderr, "ERROR: joined group with wrong flag ro\n"); exit(1); } - if (sock_fanout_open(PACKET_FANOUT_CPU, 10) != -1) { + if (sock_fanout_open(PACKET_FANOUT_CPU, 0) != -1) { fprintf(stderr, "ERROR: joined group with wrong mode\n"); exit(1); } - fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 20); + fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0); if (fds[1] == -1) { fprintf(stderr, "ERROR: failed to join group\n"); exit(1); @@ -253,6 +284,61 @@ static void test_control_group(void) } } +/* Test creating a unique fanout group ids */ +static void test_unique_fanout_group_ids(void) +{ + int fds[3]; + uint16_t typeflags, first_group_id, second_group_id; + + fprintf(stderr, "test: unique ids\n"); + + fds[0] = sock_fanout_open(PACKET_FANOUT_HASH | + PACKET_FANOUT_FLAG_UNIQUEID, 0); + if (fds[0] == -1) { + fprintf(stderr, "ERROR: failed to create a unique id group.\n"); + exit(1); + } + + sock_fanout_getopts(fds[0], &typeflags, &first_group_id); + if (typeflags != PACKET_FANOUT_HASH) { + fprintf(stderr, "ERROR: unexpected typeflags %x\n", typeflags); + exit(1); + } + + if (sock_fanout_open(PACKET_FANOUT_CPU, first_group_id) != -1) { + fprintf(stderr, "ERROR: joined group with wrong type.\n"); + exit(1); + } + + fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, first_group_id); + if (fds[1] == -1) { + fprintf(stderr, + "ERROR: failed to join previously created group.\n"); + exit(1); + } + + fds[2] = sock_fanout_open(PACKET_FANOUT_HASH | + PACKET_FANOUT_FLAG_UNIQUEID, 0); + if (fds[2] == -1) { + fprintf(stderr, + "ERROR: failed to create a second unique id group.\n"); + exit(1); + } + + sock_fanout_getopts(fds[2], &typeflags, &second_group_id); + if (sock_fanout_open(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_UNIQUEID, + second_group_id) != -1) { + fprintf(stderr, + "ERROR: specified a group id when requesting unique id\n"); + exit(1); + } + + if (close(fds[0]) || close(fds[1]) || close(fds[2])) { + fprintf(stderr, "ERROR: closing sockets\n"); + exit(1); + } +} + static int test_datapath(uint16_t typeflags, int port_off, const int expect1[], const int expect2[]) { @@ -263,14 +349,14 @@ static int test_datapath(uint16_t typeflags, int port_off, fprintf(stderr, "test: datapath 0x%hx\n", typeflags); - fds[0] = sock_fanout_open(typeflags, 20); - fds[1] = sock_fanout_open(typeflags, 20); + fds[0] = sock_fanout_open(typeflags, 0); + fds[1] = sock_fanout_open(typeflags, 0); if (fds[0] == -1 || fds[1] == -1) { fprintf(stderr, "ERROR: failed open\n"); exit(1); } if (type == PACKET_FANOUT_CBPF) - sock_setfilter(fds[0], SOL_PACKET, PACKET_FANOUT_DATA); + sock_fanout_set_cbpf(fds[0]); else if (type == PACKET_FANOUT_EBPF) sock_fanout_set_ebpf(fds[0]); @@ -331,10 +417,12 @@ int main(int argc, char **argv) const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } }; const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } }; const int expect_bpf[2][2] = { { 15, 5 }, { 15, 20 } }; + const int expect_uniqueid[2][2] = { { 20, 20}, { 20, 20 } }; int port_off = 2, tries = 5, ret; test_control_single(); test_control_group(); + test_unique_fanout_group_ids(); /* find a set of ports that do not collide onto the same socket */ ret = test_datapath(PACKET_FANOUT_HASH, port_off, @@ -365,6 +453,9 @@ int main(int argc, char **argv) ret |= test_datapath(PACKET_FANOUT_CPU, port_off, expect_cpu1[0], expect_cpu1[1]); + ret |= test_datapath(PACKET_FANOUT_FLAG_UNIQUEID, port_off, + expect_uniqueid[0], expect_uniqueid[1]); + if (ret) return 1; diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h index a77da88bf946..7d990d6c861b 100644 --- a/tools/testing/selftests/net/psock_lib.h +++ b/tools/testing/selftests/net/psock_lib.h @@ -38,7 +38,7 @@ # define __maybe_unused __attribute__ ((__unused__)) #endif -static __maybe_unused void sock_setfilter(int fd, int lvl, int optnum) +static __maybe_unused void pair_udp_setfilter(int fd) { /* the filter below checks for all of the following conditions that * are based on the contents of create_payload() @@ -76,23 +76,16 @@ static __maybe_unused void sock_setfilter(int fd, int lvl, int optnum) }; struct sock_fprog bpf_prog; - if (lvl == SOL_PACKET && optnum == PACKET_FANOUT_DATA) - bpf_filter[5].code = 0x16; /* RET A */ - bpf_prog.filter = bpf_filter; bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter); - if (setsockopt(fd, lvl, optnum, &bpf_prog, + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog, sizeof(bpf_prog))) { perror("setsockopt SO_ATTACH_FILTER"); exit(1); } } -static __maybe_unused void pair_udp_setfilter(int fd) -{ - sock_setfilter(fd, SOL_SOCKET, SO_ATTACH_FILTER); -} - static __maybe_unused void pair_udp_open(int fds[], uint16_t port) { struct sockaddr_in saddr, daddr; diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 8d9fc64c8761..72c3ac2323e1 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -8,12 +8,13 @@ ifeq ($(ARCH),powerpc) GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown") -CFLAGS := -std=gnu99 -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR)/include $(CFLAGS) +CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR)/include $(CFLAGS) export CFLAGS SUB_DIRS = alignment \ benchmarks \ + cache_shape \ copyloops \ context_switch \ dscr \ @@ -34,34 +35,34 @@ endif all: $(SUB_DIRS) $(SUB_DIRS): - BUILD_TARGET=$$OUTPUT/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all + BUILD_TARGET=$(OUTPUT)/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all include ../lib.mk override define RUN_TESTS @for TARGET in $(SUB_DIRS); do \ - BUILD_TARGET=$$OUTPUT/$$TARGET; \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\ done; endef override define INSTALL_RULE @for TARGET in $(SUB_DIRS); do \ - BUILD_TARGET=$$OUTPUT/$$TARGET; \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install;\ done; endef override define EMIT_TESTS @for TARGET in $(SUB_DIRS); do \ - BUILD_TARGET=$$OUTPUT/$$TARGET; \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests;\ done; endef override define CLEAN @for TARGET in $(SUB_DIRS); do \ - BUILD_TARGET=$$OUTPUT/$$TARGET; \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean; \ done; rm -f tags diff --git a/tools/testing/selftests/powerpc/cache_shape/.gitignore b/tools/testing/selftests/powerpc/cache_shape/.gitignore new file mode 100644 index 000000000000..ec1848434be5 --- /dev/null +++ b/tools/testing/selftests/powerpc/cache_shape/.gitignore @@ -0,0 +1 @@ +cache_shape diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile new file mode 100644 index 000000000000..b24485ab30e2 --- /dev/null +++ b/tools/testing/selftests/powerpc/cache_shape/Makefile @@ -0,0 +1,10 @@ +TEST_PROGS := cache_shape + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../harness.c ../utils.c + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/cache_shape/cache_shape.c b/tools/testing/selftests/powerpc/cache_shape/cache_shape.c new file mode 100644 index 000000000000..29ec07eba7f9 --- /dev/null +++ b/tools/testing/selftests/powerpc/cache_shape/cache_shape.c @@ -0,0 +1,125 @@ +/* + * Copyright 2017, Michael Ellerman, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <elf.h> +#include <errno.h> +#include <fcntl.h> +#include <link.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "utils.h" + +#ifndef AT_L1I_CACHESIZE +#define AT_L1I_CACHESIZE 40 +#define AT_L1I_CACHEGEOMETRY 41 +#define AT_L1D_CACHESIZE 42 +#define AT_L1D_CACHEGEOMETRY 43 +#define AT_L2_CACHESIZE 44 +#define AT_L2_CACHEGEOMETRY 45 +#define AT_L3_CACHESIZE 46 +#define AT_L3_CACHEGEOMETRY 47 +#endif + +static void print_size(const char *label, uint32_t val) +{ + printf("%s cache size: %#10x %10dB %10dK\n", label, val, val, val / 1024); +} + +static void print_geo(const char *label, uint32_t val) +{ + uint16_t assoc; + + printf("%s line size: %#10x ", label, val & 0xFFFF); + + assoc = val >> 16; + if (assoc) + printf("%u-way", assoc); + else + printf("fully"); + + printf(" associative\n"); +} + +static int test_cache_shape() +{ + static char buffer[4096]; + ElfW(auxv_t) *p; + int found; + + FAIL_IF(read_auxv(buffer, sizeof(buffer))); + + found = 0; + + p = find_auxv_entry(AT_L1I_CACHESIZE, buffer); + if (p) { + found++; + print_size("L1I ", (uint32_t)p->a_un.a_val); + } + + p = find_auxv_entry(AT_L1I_CACHEGEOMETRY, buffer); + if (p) { + found++; + print_geo("L1I ", (uint32_t)p->a_un.a_val); + } + + p = find_auxv_entry(AT_L1D_CACHESIZE, buffer); + if (p) { + found++; + print_size("L1D ", (uint32_t)p->a_un.a_val); + } + + p = find_auxv_entry(AT_L1D_CACHEGEOMETRY, buffer); + if (p) { + found++; + print_geo("L1D ", (uint32_t)p->a_un.a_val); + } + + p = find_auxv_entry(AT_L2_CACHESIZE, buffer); + if (p) { + found++; + print_size("L2 ", (uint32_t)p->a_un.a_val); + } + + p = find_auxv_entry(AT_L2_CACHEGEOMETRY, buffer); + if (p) { + found++; + print_geo("L2 ", (uint32_t)p->a_un.a_val); + } + + p = find_auxv_entry(AT_L3_CACHESIZE, buffer); + if (p) { + found++; + print_size("L3 ", (uint32_t)p->a_un.a_val); + } + + p = find_auxv_entry(AT_L3_CACHEGEOMETRY, buffer); + if (p) { + found++; + print_geo("L3 ", (uint32_t)p->a_un.a_val); + } + + /* If we found none we're probably on a system where they don't exist */ + SKIP_IF(found == 0); + + /* But if we found any, we expect to find them all */ + FAIL_IF(found != 8); + + return 0; +} + +int main(void) +{ + return test_harness(test_cache_shape, "cache_shape"); +} diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 248a820048df..66d31de60b9a 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -114,9 +114,11 @@ int test_harness(int (test_function)(void), char *name) rc = run_test(test_function, name); - if (rc == MAGIC_SKIP_RETURN_VALUE) + if (rc == MAGIC_SKIP_RETURN_VALUE) { test_skip(name); - else + /* so that skipped test is not marked as failed */ + rc = 0; + } else test_finish(name, rc); return rc; diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index 53405e8a52ab..735815b3ad7f 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -24,7 +24,11 @@ typedef uint8_t u8; void test_harness_set_timeout(uint64_t time); int test_harness(int (test_function)(void), char *name); -extern void *get_auxv_entry(int type); + +int read_auxv(char *buf, ssize_t buf_size); +void *find_auxv_entry(int type, char *auxv); +void *get_auxv_entry(int type); + int pick_online_cpu(void); static inline bool have_hwcap(unsigned long ftr) diff --git a/tools/testing/selftests/powerpc/include/vsx_asm.h b/tools/testing/selftests/powerpc/include/vsx_asm.h index d828bfb6ef2d..54064ced9e95 100644 --- a/tools/testing/selftests/powerpc/include/vsx_asm.h +++ b/tools/testing/selftests/powerpc/include/vsx_asm.h @@ -16,56 +16,56 @@ */ FUNC_START(load_vsx) li r5,0 - lxvx vs20,r5,r3 + lxvd2x vs20,r5,r3 addi r5,r5,16 - lxvx vs21,r5,r3 + lxvd2x vs21,r5,r3 addi r5,r5,16 - lxvx vs22,r5,r3 + lxvd2x vs22,r5,r3 addi r5,r5,16 - lxvx vs23,r5,r3 + lxvd2x vs23,r5,r3 addi r5,r5,16 - lxvx vs24,r5,r3 + lxvd2x vs24,r5,r3 addi r5,r5,16 - lxvx vs25,r5,r3 + lxvd2x vs25,r5,r3 addi r5,r5,16 - lxvx vs26,r5,r3 + lxvd2x vs26,r5,r3 addi r5,r5,16 - lxvx vs27,r5,r3 + lxvd2x vs27,r5,r3 addi r5,r5,16 - lxvx vs28,r5,r3 + lxvd2x vs28,r5,r3 addi r5,r5,16 - lxvx vs29,r5,r3 + lxvd2x vs29,r5,r3 addi r5,r5,16 - lxvx vs30,r5,r3 + lxvd2x vs30,r5,r3 addi r5,r5,16 - lxvx vs31,r5,r3 + lxvd2x vs31,r5,r3 blr FUNC_END(load_vsx) FUNC_START(store_vsx) li r5,0 - stxvx vs20,r5,r3 + stxvd2x vs20,r5,r3 addi r5,r5,16 - stxvx vs21,r5,r3 + stxvd2x vs21,r5,r3 addi r5,r5,16 - stxvx vs22,r5,r3 + stxvd2x vs22,r5,r3 addi r5,r5,16 - stxvx vs23,r5,r3 + stxvd2x vs23,r5,r3 addi r5,r5,16 - stxvx vs24,r5,r3 + stxvd2x vs24,r5,r3 addi r5,r5,16 - stxvx vs25,r5,r3 + stxvd2x vs25,r5,r3 addi r5,r5,16 - stxvx vs26,r5,r3 + stxvd2x vs26,r5,r3 addi r5,r5,16 - stxvx vs27,r5,r3 + stxvd2x vs27,r5,r3 addi r5,r5,16 - stxvx vs28,r5,r3 + stxvd2x vs28,r5,r3 addi r5,r5,16 - stxvx vs29,r5,r3 + stxvd2x vs29,r5,r3 addi r5,r5,16 - stxvx vs30,r5,r3 + stxvd2x vs30,r5,r3 addi r5,r5,16 - stxvx vs31,r5,r3 + stxvd2x vs31,r5,r3 blr FUNC_END(store_vsx) diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index dcf74184bfd0..d46916867a6f 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -19,45 +19,64 @@ static char auxv[4096]; -void *get_auxv_entry(int type) +int read_auxv(char *buf, ssize_t buf_size) { - ElfW(auxv_t) *p; - void *result; ssize_t num; - int fd; + int rc, fd; fd = open("/proc/self/auxv", O_RDONLY); if (fd == -1) { perror("open"); - return NULL; + return -errno; } - result = NULL; - - num = read(fd, auxv, sizeof(auxv)); + num = read(fd, buf, buf_size); if (num < 0) { perror("read"); + rc = -EIO; goto out; } - if (num > sizeof(auxv)) { - printf("Overflowed auxv buffer\n"); + if (num > buf_size) { + printf("overflowed auxv buffer\n"); + rc = -EOVERFLOW; goto out; } + rc = 0; +out: + close(fd); + return rc; +} + +void *find_auxv_entry(int type, char *auxv) +{ + ElfW(auxv_t) *p; + p = (ElfW(auxv_t) *)auxv; while (p->a_type != AT_NULL) { - if (p->a_type == type) { - result = (void *)p->a_un.a_val; - break; - } + if (p->a_type == type) + return p; p++; } -out: - close(fd); - return result; + + return NULL; +} + +void *get_auxv_entry(int type) +{ + ElfW(auxv_t) *p; + + if (read_auxv(auxv, sizeof(auxv))) + return NULL; + + p = find_auxv_entry(type, auxv); + if (p) + return (void *)p->a_un.a_val; + + return NULL; } int pick_online_cpu(void) diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index b90e50c36f9f..5fa1d7e9a915 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -3,7 +3,7 @@ CFLAGS += -O3 -Wl,-no-as-needed -Wall $(BUILD_FLAGS) LDFLAGS += -lrt -lpthread # these are all "safe" tests that don't modify -# system time or require escalated privledges +# system time or require escalated privileges TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \ inconsistency-check raw_skew threadtest rtctest @@ -14,7 +14,7 @@ TEST_GEN_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex adjtick change_skew include ../lib.mk -# these tests require escalated privledges +# these tests require escalated privileges # and may modify the system time or trigger # other behavior like suspend run_destructive_tests: run_tests diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 4cff7e7ddcc4..cbb29e41ef2b 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,5 +1,9 @@ # Makefile for vm selftests +ifndef OUTPUT + OUTPUT := $(shell pwd) +endif + CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) LDLIBS = -lrt TEST_GEN_FILES = compaction_test @@ -11,21 +15,15 @@ TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd -TEST_GEN_FILES += userfaultfd_hugetlb -TEST_GEN_FILES += userfaultfd_shmem TEST_GEN_FILES += mlock-random-test +TEST_GEN_FILES += virtual_address_range TEST_PROGS := run_vmtests include ../lib.mk -$(OUTPUT)/userfaultfd: LDLIBS += -lpthread ../../../../usr/include/linux/kernel.h - -$(OUTPUT)/userfaultfd_hugetlb: userfaultfd.c ../../../../usr/include/linux/kernel.h - $(CC) $(CFLAGS) -DHUGETLB_TEST -O2 -o $@ $< -lpthread - -$(OUTPUT)/userfaultfd_shmem: userfaultfd.c ../../../../usr/include/linux/kernel.h - $(CC) $(CFLAGS) -DSHMEM_TEST -O2 -o $@ $< -lpthread +$(OUTPUT)/userfaultfd: ../../../../usr/include/linux/kernel.h +$(OUTPUT)/userfaultfd: LDLIBS += -lpthread $(OUTPUT)/mlock-random-test: LDLIBS += -lcap diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index 2ed05ad00daa..07548a1fa901 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -95,7 +95,7 @@ echo " hugetlb regression testing." echo "-------------------" echo "running userfaultfd" echo "-------------------" -./userfaultfd 128 32 +./userfaultfd anon 128 32 if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 @@ -107,7 +107,7 @@ echo "---------------------------" echo "running userfaultfd_hugetlb" echo "---------------------------" # 256MB total huge pages == 128MB src and 128MB dst -./userfaultfd_hugetlb 128 32 $mnt/ufd_test_file +./userfaultfd hugetlb 128 32 $mnt/ufd_test_file if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 @@ -119,7 +119,7 @@ rm -f $mnt/ufd_test_file echo "-------------------------" echo "running userfaultfd_shmem" echo "-------------------------" -./userfaultfd_shmem 128 32 +./userfaultfd shmem 128 32 if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 @@ -165,4 +165,15 @@ else echo "[PASS]" fi +echo "-----------------------------" +echo "running virtual_address_range" +echo "-----------------------------" +./virtual_address_range +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + exit $exitcode diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index e9449c801888..1eae79ae5b4e 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -77,10 +77,13 @@ static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; #define BOUNCE_POLL (1<<3) static int bounces; -#ifdef HUGETLB_TEST +#define TEST_ANON 1 +#define TEST_HUGETLB 2 +#define TEST_SHMEM 3 +static int test_type; + static int huge_fd; static char *huge_fd_off0; -#endif static unsigned long long *count_verify; static int uffd, uffd_flags, finished, *pipefd; static char *area_src, *area_dst; @@ -102,14 +105,7 @@ pthread_attr_t attr; ~(unsigned long)(sizeof(unsigned long long) \ - 1))) -#if !defined(HUGETLB_TEST) && !defined(SHMEM_TEST) - -/* Anonymous memory */ -#define EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \ - (1 << _UFFDIO_COPY) | \ - (1 << _UFFDIO_ZEROPAGE)) - -static int release_pages(char *rel_area) +static int anon_release_pages(char *rel_area) { int ret = 0; @@ -121,7 +117,7 @@ static int release_pages(char *rel_area) return ret; } -static void allocate_area(void **alloc_area) +static void anon_allocate_area(void **alloc_area) { if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) { fprintf(stderr, "out of memory\n"); @@ -129,14 +125,9 @@ static void allocate_area(void **alloc_area) } } -#else /* HUGETLB_TEST or SHMEM_TEST */ - -#define EXPECTED_IOCTLS UFFD_API_RANGE_IOCTLS_BASIC - -#ifdef HUGETLB_TEST /* HugeTLB memory */ -static int release_pages(char *rel_area) +static int hugetlb_release_pages(char *rel_area) { int ret = 0; @@ -152,7 +143,7 @@ static int release_pages(char *rel_area) } -static void allocate_area(void **alloc_area) +static void hugetlb_allocate_area(void **alloc_area) { *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_HUGETLB, huge_fd, @@ -167,10 +158,8 @@ static void allocate_area(void **alloc_area) huge_fd_off0 = *alloc_area; } -#elif defined(SHMEM_TEST) - /* Shared memory */ -static int release_pages(char *rel_area) +static int shmem_release_pages(char *rel_area) { int ret = 0; @@ -182,7 +171,7 @@ static int release_pages(char *rel_area) return ret; } -static void allocate_area(void **alloc_area) +static void shmem_allocate_area(void **alloc_area) { *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); @@ -192,11 +181,35 @@ static void allocate_area(void **alloc_area) } } -#else /* SHMEM_TEST */ -#error "Undefined test type" -#endif /* HUGETLB_TEST */ - -#endif /* !defined(HUGETLB_TEST) && !defined(SHMEM_TEST) */ +struct uffd_test_ops { + unsigned long expected_ioctls; + void (*allocate_area)(void **alloc_area); + int (*release_pages)(char *rel_area); +}; + +#define ANON_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \ + (1 << _UFFDIO_COPY) | \ + (1 << _UFFDIO_ZEROPAGE)) + +static struct uffd_test_ops anon_uffd_test_ops = { + .expected_ioctls = ANON_EXPECTED_IOCTLS, + .allocate_area = anon_allocate_area, + .release_pages = anon_release_pages, +}; + +static struct uffd_test_ops shmem_uffd_test_ops = { + .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC, + .allocate_area = shmem_allocate_area, + .release_pages = shmem_release_pages, +}; + +static struct uffd_test_ops hugetlb_uffd_test_ops = { + .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC, + .allocate_area = hugetlb_allocate_area, + .release_pages = hugetlb_release_pages, +}; + +static struct uffd_test_ops *uffd_test_ops; static int my_bcmp(char *str1, char *str2, size_t n) { @@ -505,7 +518,7 @@ static int stress(unsigned long *userfaults) * UFFDIO_COPY without writing zero pages into area_dst * because the background threads already completed). */ - if (release_pages(area_src)) + if (uffd_test_ops->release_pages(area_src)) return 1; for (cpu = 0; cpu < nr_cpus; cpu++) { @@ -577,12 +590,12 @@ static int faulting_process(void) { unsigned long nr; unsigned long long count; + unsigned long split_nr_pages; -#ifndef HUGETLB_TEST - unsigned long split_nr_pages = (nr_pages + 1) / 2; -#else - unsigned long split_nr_pages = nr_pages; -#endif + if (test_type != TEST_HUGETLB) + split_nr_pages = (nr_pages + 1) / 2; + else + split_nr_pages = nr_pages; for (nr = 0; nr < split_nr_pages; nr++) { count = *area_count(area_dst, nr); @@ -594,7 +607,9 @@ static int faulting_process(void) } } -#ifndef HUGETLB_TEST + if (test_type == TEST_HUGETLB) + return 0; + area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, area_src); if (area_dst == MAP_FAILED) @@ -610,7 +625,7 @@ static int faulting_process(void) } } - if (release_pages(area_dst)) + if (uffd_test_ops->release_pages(area_dst)) return 1; for (nr = 0; nr < nr_pages; nr++) { @@ -618,8 +633,6 @@ static int faulting_process(void) fprintf(stderr, "nr %lu is not zero\n", nr), exit(1); } -#endif /* HUGETLB_TEST */ - return 0; } @@ -627,7 +640,9 @@ static int uffdio_zeropage(int ufd, unsigned long offset) { struct uffdio_zeropage uffdio_zeropage; int ret; - unsigned long has_zeropage = EXPECTED_IOCTLS & (1 << _UFFDIO_ZEROPAGE); + unsigned long has_zeropage; + + has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE); if (offset >= nr_pages * page_size) fprintf(stderr, "unexpected offset %lu\n", @@ -675,7 +690,7 @@ static int userfaultfd_zeropage_test(void) printf("testing UFFDIO_ZEROPAGE: "); fflush(stdout); - if (release_pages(area_dst)) + if (uffd_test_ops->release_pages(area_dst)) return 1; if (userfaultfd_open(0) < 0) @@ -686,7 +701,7 @@ static int userfaultfd_zeropage_test(void) if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) fprintf(stderr, "register failure\n"), exit(1); - expected_ioctls = EXPECTED_IOCTLS; + expected_ioctls = uffd_test_ops->expected_ioctls; if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) fprintf(stderr, @@ -716,7 +731,7 @@ static int userfaultfd_events_test(void) printf("testing events (fork, remap, remove): "); fflush(stdout); - if (release_pages(area_dst)) + if (uffd_test_ops->release_pages(area_dst)) return 1; features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP | @@ -731,7 +746,7 @@ static int userfaultfd_events_test(void) if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) fprintf(stderr, "register failure\n"), exit(1); - expected_ioctls = EXPECTED_IOCTLS; + expected_ioctls = uffd_test_ops->expected_ioctls; if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) fprintf(stderr, @@ -773,10 +788,10 @@ static int userfaultfd_stress(void) int err; unsigned long userfaults[nr_cpus]; - allocate_area((void **)&area_src); + uffd_test_ops->allocate_area((void **)&area_src); if (!area_src) return 1; - allocate_area((void **)&area_dst); + uffd_test_ops->allocate_area((void **)&area_dst); if (!area_dst) return 1; @@ -856,7 +871,7 @@ static int userfaultfd_stress(void) fprintf(stderr, "register failure\n"); return 1; } - expected_ioctls = EXPECTED_IOCTLS; + expected_ioctls = uffd_test_ops->expected_ioctls; if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) { fprintf(stderr, @@ -888,7 +903,7 @@ static int userfaultfd_stress(void) * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's * required to MADV_DONTNEED here. */ - if (release_pages(area_dst)) + if (uffd_test_ops->release_pages(area_dst)) return 1; /* bounce pass */ @@ -934,36 +949,6 @@ static int userfaultfd_stress(void) return userfaultfd_zeropage_test() || userfaultfd_events_test(); } -#ifndef HUGETLB_TEST - -int main(int argc, char **argv) -{ - if (argc < 3) - fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - page_size = sysconf(_SC_PAGE_SIZE); - if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 - > page_size) - fprintf(stderr, "Impossible to run this test\n"), exit(2); - nr_pages_per_cpu = atol(argv[1]) * 1024*1024 / page_size / - nr_cpus; - if (!nr_pages_per_cpu) { - fprintf(stderr, "invalid MiB\n"); - fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); - } - bounces = atoi(argv[2]); - if (bounces <= 0) { - fprintf(stderr, "invalid bounces\n"); - fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); - } - nr_pages = nr_pages_per_cpu * nr_cpus; - printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", - nr_pages, nr_pages_per_cpu); - return userfaultfd_stress(); -} - -#else /* HUGETLB_TEST */ - /* * Copied from mlock2-tests.c */ @@ -988,48 +973,78 @@ unsigned long default_huge_page_size(void) return hps; } -int main(int argc, char **argv) +static void set_test_type(const char *type) { - if (argc < 4) - fprintf(stderr, "Usage: <MiB> <bounces> <hugetlbfs_file>\n"), - exit(1); - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - page_size = default_huge_page_size(); + if (!strcmp(type, "anon")) { + test_type = TEST_ANON; + uffd_test_ops = &anon_uffd_test_ops; + } else if (!strcmp(type, "hugetlb")) { + test_type = TEST_HUGETLB; + uffd_test_ops = &hugetlb_uffd_test_ops; + } else if (!strcmp(type, "shmem")) { + test_type = TEST_SHMEM; + uffd_test_ops = &shmem_uffd_test_ops; + } else { + fprintf(stderr, "Unknown test type: %s\n", type), exit(1); + } + + if (test_type == TEST_HUGETLB) + page_size = default_huge_page_size(); + else + page_size = sysconf(_SC_PAGE_SIZE); + if (!page_size) - fprintf(stderr, "Unable to determine huge page size\n"), + fprintf(stderr, "Unable to determine page size\n"), exit(2); if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 > page_size) fprintf(stderr, "Impossible to run this test\n"), exit(2); - nr_pages_per_cpu = atol(argv[1]) * 1024*1024 / page_size / +} + +int main(int argc, char **argv) +{ + if (argc < 4) + fprintf(stderr, "Usage: <test type> <MiB> <bounces> [hugetlbfs_file]\n"), + exit(1); + + set_test_type(argv[1]); + + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size / nr_cpus; if (!nr_pages_per_cpu) { fprintf(stderr, "invalid MiB\n"); fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); } - bounces = atoi(argv[2]); + + bounces = atoi(argv[3]); if (bounces <= 0) { fprintf(stderr, "invalid bounces\n"); fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); } nr_pages = nr_pages_per_cpu * nr_cpus; - huge_fd = open(argv[3], O_CREAT | O_RDWR, 0755); - if (huge_fd < 0) { - fprintf(stderr, "Open of %s failed", argv[3]); - perror("open"); - exit(1); - } - if (ftruncate(huge_fd, 0)) { - fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]); - perror("ftruncate"); - exit(1); + + if (test_type == TEST_HUGETLB) { + if (argc < 5) + fprintf(stderr, "Usage: hugetlb <MiB> <bounces> <hugetlbfs_file>\n"), + exit(1); + huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755); + if (huge_fd < 0) { + fprintf(stderr, "Open of %s failed", argv[3]); + perror("open"); + exit(1); + } + if (ftruncate(huge_fd, 0)) { + fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]); + perror("ftruncate"); + exit(1); + } } printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", nr_pages, nr_pages_per_cpu); return userfaultfd_stress(); } -#endif #else /* __NR_userfaultfd */ #warning "missing __NR_userfaultfd definition" diff --git a/tools/testing/selftests/vm/virtual_address_range.c b/tools/testing/selftests/vm/virtual_address_range.c new file mode 100644 index 000000000000..3b02aa6eb9da --- /dev/null +++ b/tools/testing/selftests/vm/virtual_address_range.c @@ -0,0 +1,122 @@ +/* + * Copyright 2017, Anshuman Khandual, IBM Corp. + * Licensed under GPLv2. + * + * Works on architectures which support 128TB virtual + * address range and beyond. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <numaif.h> +#include <sys/mman.h> +#include <sys/time.h> + +/* + * Maximum address range mapped with a single mmap() + * call is little bit more than 16GB. Hence 16GB is + * chosen as the single chunk size for address space + * mapping. + */ +#define MAP_CHUNK_SIZE 17179869184UL /* 16GB */ + +/* + * Address space till 128TB is mapped without any hint + * and is enabled by default. Address space beyond 128TB + * till 512TB is obtained by passing hint address as the + * first argument into mmap() system call. + * + * The process heap address space is divided into two + * different areas one below 128TB and one above 128TB + * till it reaches 512TB. One with size 128TB and the + * other being 384TB. + */ +#define NR_CHUNKS_128TB 8192UL /* Number of 16GB chunks for 128TB */ +#define NR_CHUNKS_384TB 24576UL /* Number of 16GB chunks for 384TB */ + +#define ADDR_MARK_128TB (1UL << 47) /* First address beyond 128TB */ + +static char *hind_addr(void) +{ + int bits = 48 + rand() % 15; + + return (char *) (1UL << bits); +} + +static int validate_addr(char *ptr, int high_addr) +{ + unsigned long addr = (unsigned long) ptr; + + if (high_addr) { + if (addr < ADDR_MARK_128TB) { + printf("Bad address %lx\n", addr); + return 1; + } + return 0; + } + + if (addr > ADDR_MARK_128TB) { + printf("Bad address %lx\n", addr); + return 1; + } + return 0; +} + +static int validate_lower_address_hint(void) +{ + char *ptr; + + ptr = mmap((void *) (1UL << 45), MAP_CHUNK_SIZE, PROT_READ | + PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (ptr == MAP_FAILED) + return 0; + + return 1; +} + +int main(int argc, char *argv[]) +{ + char *ptr[NR_CHUNKS_128TB]; + char *hptr[NR_CHUNKS_384TB]; + char *hint; + unsigned long i, lchunks, hchunks; + + for (i = 0; i < NR_CHUNKS_128TB; i++) { + ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (ptr[i] == MAP_FAILED) { + if (validate_lower_address_hint()) + return 1; + break; + } + + if (validate_addr(ptr[i], 0)) + return 1; + } + lchunks = i; + + for (i = 0; i < NR_CHUNKS_384TB; i++) { + hint = hind_addr(); + hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (hptr[i] == MAP_FAILED) + break; + + if (validate_addr(hptr[i], 1)) + return 1; + } + hchunks = i; + + for (i = 0; i < lchunks; i++) + munmap(ptr[i], MAP_CHUNK_SIZE); + + for (i = 0; i < hchunks; i++) + munmap(hptr[i], MAP_CHUNK_SIZE); + + return 0; +} diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 5b2b4b3c634c..b4967d875236 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -245,7 +245,7 @@ void do_unexpected_base(void) long ret; asm volatile ("int $0x80" : "=a" (ret) : "a" (243), "b" (low_desc) - : "flags"); + : "r8", "r9", "r10", "r11"); memcpy(&desc, low_desc, sizeof(desc)); munmap(low_desc, sizeof(desc)); diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 4af47079cf04..b9a22f18566a 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -45,6 +45,12 @@ #define AR_DB (1 << 22) #define AR_G (1 << 23) +#ifdef __x86_64__ +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" +#else +# define INT80_CLOBBERS +#endif + static int nerrs; /* Points to an array of 1024 ints, each holding its own index. */ @@ -403,6 +409,51 @@ static void *threadproc(void *ctx) } } +#ifdef __i386__ + +#ifndef SA_RESTORE +#define SA_RESTORER 0x04000000 +#endif + +/* + * The UAPI header calls this 'struct sigaction', which conflicts with + * glibc. Sigh. + */ +struct fake_ksigaction { + void *handler; /* the real type is nasty */ + unsigned long sa_flags; + void (*sa_restorer)(void); + unsigned char sigset[8]; +}; + +static void fix_sa_restorer(int sig) +{ + struct fake_ksigaction ksa; + + if (syscall(SYS_rt_sigaction, sig, NULL, &ksa, 8) == 0) { + /* + * glibc has a nasty bug: it sometimes writes garbage to + * sa_restorer. This interacts quite badly with anything + * that fiddles with SS because it can trigger legacy + * stack switching. Patch it up. See: + * + * https://sourceware.org/bugzilla/show_bug.cgi?id=21269 + */ + if (!(ksa.sa_flags & SA_RESTORER) && ksa.sa_restorer) { + ksa.sa_restorer = NULL; + if (syscall(SYS_rt_sigaction, sig, &ksa, NULL, + sizeof(ksa.sigset)) != 0) + err(1, "rt_sigaction"); + } + } +} +#else +static void fix_sa_restorer(int sig) +{ + /* 64-bit glibc works fine. */ +} +#endif + static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) { @@ -414,6 +465,7 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), if (sigaction(sig, &sa, 0)) err(1, "sigaction"); + fix_sa_restorer(sig); } static jmp_buf jmpbuf; @@ -588,7 +640,7 @@ static int invoke_set_thread_area(void) asm volatile ("int $0x80" : "=a" (ret), "+m" (low_user_desc) : "a" (243), "b" (low_user_desc) - : "flags"); + : INT80_CLOBBERS); return ret; } @@ -657,7 +709,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); if (sel != 0) { result = "FAIL"; @@ -688,7 +740,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); if (sel != 0) { result = "FAIL"; @@ -721,7 +773,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); #ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base); @@ -774,7 +826,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); #ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base); diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index 616ee9673339..a8df159a8924 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -404,8 +404,6 @@ void handler(int signum, siginfo_t *si, void *vucontext) dprintf2("info->si_lower: %p\n", __si_bounds_lower(si)); dprintf2("info->si_upper: %p\n", __si_bounds_upper(si)); - check_siginfo_vs_shadow(si); - for (i = 0; i < 8; i++) dprintf3("[%d]: %p\n", i, si_addr_ptr[i]); switch (br_reason) { @@ -416,6 +414,9 @@ void handler(int signum, siginfo_t *si, void *vucontext) exit(5); case 1: /* #BR MPX bounds exception */ /* these are normal and we expect to see them */ + + check_siginfo_vs_shadow(si); + dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n", status, (void *)ip, si->si_addr); num_bnd_chk++; diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index b037ce9cf116..eaea92439708 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c @@ -58,7 +58,8 @@ static void do_full_int80(struct syscall_args32 *args) asm volatile ("int $0x80" : "+a" (args->nr), "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2), - "+S" (args->arg3), "+D" (args->arg4), "+r" (bp)); + "+S" (args->arg3), "+D" (args->arg4), "+r" (bp) + : : "r8", "r9", "r10", "r11"); args->arg5 = bp; #else sys32_helper(args, int80_and_ret); diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c index 50c26358e8b7..a48da95c18fd 100644 --- a/tools/testing/selftests/x86/single_step_syscall.c +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -56,9 +56,11 @@ static volatile sig_atomic_t sig_traps; #ifdef __x86_64__ # define REG_IP REG_RIP # define WIDTH "q" +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" #else # define REG_IP REG_EIP # define WIDTH "l" +# define INT80_CLOBBERS #endif static unsigned long get_eflags(void) @@ -140,7 +142,8 @@ int main() printf("[RUN]\tSet TF and check int80\n"); set_eflags(get_eflags() | X86_EFLAGS_TF); - asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)); + asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid) + : INT80_CLOBBERS); check_result(); /* |