diff options
Diffstat (limited to 'tools/include')
146 files changed, 10599 insertions, 7431 deletions
diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h index e974ec932ec1..35f33780ca6c 100644 --- a/tools/include/asm-generic/bitops/__fls.h +++ b/tools/include/asm-generic/bitops/__fls.h @@ -10,7 +10,7 @@ * * Undefined if no set bit exists, so code should check against 0 first. */ -static __always_inline unsigned int generic___fls(unsigned long word) +static __always_inline __attribute_const__ unsigned int generic___fls(unsigned long word) { unsigned int num = BITS_PER_LONG - 1; diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h index 26f3ce1dd6e4..8eed3437edb9 100644 --- a/tools/include/asm-generic/bitops/fls.h +++ b/tools/include/asm-generic/bitops/fls.h @@ -10,7 +10,7 @@ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -static __always_inline int generic_fls(unsigned int x) +static __always_inline __attribute_const__ int generic_fls(unsigned int x) { int r = 32; diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h index 866f2b2304ff..b5f58dd261a3 100644 --- a/tools/include/asm-generic/bitops/fls64.h +++ b/tools/include/asm-generic/bitops/fls64.h @@ -16,7 +16,7 @@ * at position 64. */ #if BITS_PER_LONG == 32 -static __always_inline int fls64(__u64 x) +static __always_inline __attribute_const__ int fls64(__u64 x) { __u32 h = x >> 32; if (h) @@ -24,7 +24,7 @@ static __always_inline int fls64(__u64 x) return fls(x); } #elif BITS_PER_LONG == 64 -static __always_inline int fls64(__u64 x) +static __always_inline __attribute_const__ int fls64(__u64 x) { if (x == 0) return 0; diff --git a/tools/include/asm-generic/io.h b/tools/include/asm-generic/io.h new file mode 100644 index 000000000000..e5a0b07ad452 --- /dev/null +++ b/tools/include/asm-generic/io.h @@ -0,0 +1,482 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_ASM_GENERIC_IO_H +#define _TOOLS_ASM_GENERIC_IO_H + +#include <asm/barrier.h> +#include <asm/byteorder.h> + +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/types.h> + +#ifndef mmiowb_set_pending +#define mmiowb_set_pending() do { } while (0) +#endif + +#ifndef __io_br +#define __io_br() barrier() +#endif + +/* prevent prefetching of coherent DMA data ahead of a dma-complete */ +#ifndef __io_ar +#ifdef rmb +#define __io_ar(v) rmb() +#else +#define __io_ar(v) barrier() +#endif +#endif + +/* flush writes to coherent DMA data before possibly triggering a DMA read */ +#ifndef __io_bw +#ifdef wmb +#define __io_bw() wmb() +#else +#define __io_bw() barrier() +#endif +#endif + +/* serialize device access against a spin_unlock, usually handled there. */ +#ifndef __io_aw +#define __io_aw() mmiowb_set_pending() +#endif + +#ifndef __io_pbw +#define __io_pbw() __io_bw() +#endif + +#ifndef __io_paw +#define __io_paw() __io_aw() +#endif + +#ifndef __io_pbr +#define __io_pbr() __io_br() +#endif + +#ifndef __io_par +#define __io_par(v) __io_ar(v) +#endif + +#ifndef _THIS_IP_ +#define _THIS_IP_ 0 +#endif + +static inline void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} +static inline void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} +static inline void log_read_mmio(u8 width, const volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} +static inline void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} + +/* + * __raw_{read,write}{b,w,l,q}() access memory in native endianness. + * + * On some architectures memory mapped IO needs to be accessed differently. + * On the simple architectures, we just read/write the memory location + * directly. + */ + +#ifndef __raw_readb +#define __raw_readb __raw_readb +static inline u8 __raw_readb(const volatile void __iomem *addr) +{ + return *(const volatile u8 __force *)addr; +} +#endif + +#ifndef __raw_readw +#define __raw_readw __raw_readw +static inline u16 __raw_readw(const volatile void __iomem *addr) +{ + return *(const volatile u16 __force *)addr; +} +#endif + +#ifndef __raw_readl +#define __raw_readl __raw_readl +static inline u32 __raw_readl(const volatile void __iomem *addr) +{ + return *(const volatile u32 __force *)addr; +} +#endif + +#ifndef __raw_readq +#define __raw_readq __raw_readq +static inline u64 __raw_readq(const volatile void __iomem *addr) +{ + return *(const volatile u64 __force *)addr; +} +#endif + +#ifndef __raw_writeb +#define __raw_writeb __raw_writeb +static inline void __raw_writeb(u8 value, volatile void __iomem *addr) +{ + *(volatile u8 __force *)addr = value; +} +#endif + +#ifndef __raw_writew +#define __raw_writew __raw_writew +static inline void __raw_writew(u16 value, volatile void __iomem *addr) +{ + *(volatile u16 __force *)addr = value; +} +#endif + +#ifndef __raw_writel +#define __raw_writel __raw_writel +static inline void __raw_writel(u32 value, volatile void __iomem *addr) +{ + *(volatile u32 __force *)addr = value; +} +#endif + +#ifndef __raw_writeq +#define __raw_writeq __raw_writeq +static inline void __raw_writeq(u64 value, volatile void __iomem *addr) +{ + *(volatile u64 __force *)addr = value; +} +#endif + +/* + * {read,write}{b,w,l,q}() access little endian memory and return result in + * native endianness. + */ + +#ifndef readb +#define readb readb +static inline u8 readb(const volatile void __iomem *addr) +{ + u8 val; + + log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __raw_readb(addr); + __io_ar(val); + log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readw +#define readw readw +static inline u16 readw(const volatile void __iomem *addr) +{ + u16 val; + + log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); + __io_ar(val); + log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readl +#define readl readl +static inline u32 readl(const volatile void __iomem *addr) +{ + u32 val; + + log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); + __io_ar(val); + log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readq +#define readq readq +static inline u64 readq(const volatile void __iomem *addr) +{ + u64 val; + + log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); + __io_ar(val); + log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef writeb +#define writeb writeb +static inline void writeb(u8 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writeb(value, addr); + __io_aw(); + log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writew +#define writew writew +static inline void writew(u16 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writew((u16 __force)cpu_to_le16(value), addr); + __io_aw(); + log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writel +#define writel writel +static inline void writel(u32 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writel((u32 __force)__cpu_to_le32(value), addr); + __io_aw(); + log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writeq +#define writeq writeq +static inline void writeq(u64 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); + __io_aw(); + log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); +} +#endif + +/* + * {read,write}{b,w,l,q}_relaxed() are like the regular version, but + * are not guaranteed to provide ordering against spinlocks or memory + * accesses. + */ +#ifndef readb_relaxed +#define readb_relaxed readb_relaxed +static inline u8 readb_relaxed(const volatile void __iomem *addr) +{ + u8 val; + + log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); + val = __raw_readb(addr); + log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readw_relaxed +#define readw_relaxed readw_relaxed +static inline u16 readw_relaxed(const volatile void __iomem *addr) +{ + u16 val; + + log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); + val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); + log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readl_relaxed +#define readl_relaxed readl_relaxed +static inline u32 readl_relaxed(const volatile void __iomem *addr) +{ + u32 val; + + log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); + val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); + log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#if defined(readq) && !defined(readq_relaxed) +#define readq_relaxed readq_relaxed +static inline u64 readq_relaxed(const volatile void __iomem *addr) +{ + u64 val; + + log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); + log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef writeb_relaxed +#define writeb_relaxed writeb_relaxed +static inline void writeb_relaxed(u8 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + __raw_writeb(value, addr); + log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writew_relaxed +#define writew_relaxed writew_relaxed +static inline void writew_relaxed(u16 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + __raw_writew((u16 __force)cpu_to_le16(value), addr); + log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writel_relaxed +#define writel_relaxed writel_relaxed +static inline void writel_relaxed(u32 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + __raw_writel((u32 __force)__cpu_to_le32(value), addr); + log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#if defined(writeq) && !defined(writeq_relaxed) +#define writeq_relaxed writeq_relaxed +static inline void writeq_relaxed(u64 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); + log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); +} +#endif + +/* + * {read,write}s{b,w,l,q}() repeatedly access the same memory address in + * native endianness in 8-, 16-, 32- or 64-bit chunks (@count times). + */ +#ifndef readsb +#define readsb readsb +static inline void readsb(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u8 *buf = buffer; + + do { + u8 x = __raw_readb(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef readsw +#define readsw readsw +static inline void readsw(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u16 *buf = buffer; + + do { + u16 x = __raw_readw(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef readsl +#define readsl readsl +static inline void readsl(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u32 *buf = buffer; + + do { + u32 x = __raw_readl(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef readsq +#define readsq readsq +static inline void readsq(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u64 *buf = buffer; + + do { + u64 x = __raw_readq(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef writesb +#define writesb writesb +static inline void writesb(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u8 *buf = buffer; + + do { + __raw_writeb(*buf++, addr); + } while (--count); + } +} +#endif + +#ifndef writesw +#define writesw writesw +static inline void writesw(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u16 *buf = buffer; + + do { + __raw_writew(*buf++, addr); + } while (--count); + } +} +#endif + +#ifndef writesl +#define writesl writesl +static inline void writesl(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u32 *buf = buffer; + + do { + __raw_writel(*buf++, addr); + } while (--count); + } +} +#endif + +#ifndef writesq +#define writesq writesq +static inline void writesq(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u64 *buf = buffer; + + do { + __raw_writeq(*buf++, addr); + } while (--count); + } +} +#endif + +#endif /* _TOOLS_ASM_GENERIC_IO_H */ diff --git a/tools/include/asm/io.h b/tools/include/asm/io.h new file mode 100644 index 000000000000..eed5066f25c4 --- /dev/null +++ b/tools/include/asm/io.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_ASM_IO_H +#define _TOOLS_ASM_IO_H + +#if defined(__i386__) || defined(__x86_64__) +#include "../../arch/x86/include/asm/io.h" +#else +#include <asm-generic/io.h> +#endif + +#endif /* _TOOLS_ASM_IO_H */ diff --git a/tools/include/asm/timex.h b/tools/include/asm/timex.h new file mode 100644 index 000000000000..5adfe3c6d326 --- /dev/null +++ b/tools/include/asm/timex.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TOOLS_LINUX_ASM_TIMEX_H +#define __TOOLS_LINUX_ASM_TIMEX_H + +#include <time.h> + +#define cycles_t clock_t + +static inline cycles_t get_cycles(void) +{ + return clock(); +} +#endif // __TOOLS_LINUX_ASM_TIMEX_H diff --git a/tools/include/io_uring/mini_liburing.h b/tools/include/io_uring/mini_liburing.h index 9ccb16074eb5..44be4446feda 100644 --- a/tools/include/io_uring/mini_liburing.h +++ b/tools/include/io_uring/mini_liburing.h @@ -6,6 +6,7 @@ #include <stdio.h> #include <string.h> #include <unistd.h> +#include <sys/uio.h> struct io_sq_ring { unsigned int *head; @@ -55,6 +56,7 @@ struct io_uring { struct io_uring_sq sq; struct io_uring_cq cq; int ring_fd; + unsigned flags; }; #if defined(__x86_64) || defined(__i386__) @@ -72,7 +74,14 @@ static inline int io_uring_mmap(int fd, struct io_uring_params *p, void *ptr; int ret; - sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned int); + if (p->flags & IORING_SETUP_NO_SQARRAY) { + sq->ring_sz = p->cq_off.cqes; + sq->ring_sz += p->cq_entries * sizeof(struct io_uring_cqe); + } else { + sq->ring_sz = p->sq_off.array; + sq->ring_sz += p->sq_entries * sizeof(unsigned int); + } + ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); if (ptr == MAP_FAILED) @@ -83,7 +92,8 @@ static inline int io_uring_mmap(int fd, struct io_uring_params *p, sq->kring_entries = ptr + p->sq_off.ring_entries; sq->kflags = ptr + p->sq_off.flags; sq->kdropped = ptr + p->sq_off.dropped; - sq->array = ptr + p->sq_off.array; + if (!(p->flags & IORING_SETUP_NO_SQARRAY)) + sq->array = ptr + p->sq_off.array; size = p->sq_entries * sizeof(struct io_uring_sqe); sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, @@ -126,28 +136,39 @@ static inline int io_uring_enter(int fd, unsigned int to_submit, flags, sig, _NSIG / 8); } -static inline int io_uring_queue_init(unsigned int entries, - struct io_uring *ring, - unsigned int flags) +static inline int io_uring_queue_init_params(unsigned int entries, + struct io_uring *ring, + struct io_uring_params *p) { - struct io_uring_params p; int fd, ret; memset(ring, 0, sizeof(*ring)); - memset(&p, 0, sizeof(p)); - p.flags = flags; - fd = io_uring_setup(entries, &p); + fd = io_uring_setup(entries, p); if (fd < 0) return fd; - ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq); - if (!ret) + ret = io_uring_mmap(fd, p, &ring->sq, &ring->cq); + if (!ret) { ring->ring_fd = fd; - else + ring->flags = p->flags; + } else { close(fd); + } return ret; } +static inline int io_uring_queue_init(unsigned int entries, + struct io_uring *ring, + unsigned int flags) +{ + struct io_uring_params p; + + memset(&p, 0, sizeof(p)); + p.flags = flags; + + return io_uring_queue_init_params(entries, ring, &p); +} + /* Get a sqe */ static inline struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) { @@ -199,10 +220,18 @@ static inline int io_uring_submit(struct io_uring *ring) ktail = *sq->ktail; to_submit = sq->sqe_tail - sq->sqe_head; - for (submitted = 0; submitted < to_submit; submitted++) { - read_barrier(); - sq->array[ktail++ & mask] = sq->sqe_head++ & mask; + + if (!(ring->flags & IORING_SETUP_NO_SQARRAY)) { + for (submitted = 0; submitted < to_submit; submitted++) { + read_barrier(); + sq->array[ktail++ & mask] = sq->sqe_head++ & mask; + } + } else { + ktail += to_submit; + sq->sqe_head += to_submit; + submitted = to_submit; } + if (!submitted) return 0; diff --git a/tools/include/linux/args.h b/tools/include/linux/args.h new file mode 100644 index 000000000000..14b268f2389a --- /dev/null +++ b/tools/include/linux/args.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_ARGS_H +#define _LINUX_ARGS_H + +/* + * How do these macros work? + * + * In __COUNT_ARGS() _0 to _12 are just placeholders from the start + * in order to make sure _n is positioned over the correct number + * from 12 to 0 (depending on X, which is a variadic argument list). + * They serve no purpose other than occupying a position. Since each + * macro parameter must have a distinct identifier, those identifiers + * are as good as any. + * + * In COUNT_ARGS() we use actual integers, so __COUNT_ARGS() returns + * that as _n. + */ + +/* This counts to 15. Any more, it will return 16th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +/* Concatenate two parameters, but allow them to be expanded beforehand. */ +#ifndef __CONCAT +#define __CONCAT(a, b) a ## b +#endif +#ifndef CONCATENATE +#define CONCATENATE(a, b) __CONCAT(a, b) +#endif + +#endif /* _LINUX_ARGS_H */ diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h index 01907b33537e..50c66ba9ada5 100644 --- a/tools/include/linux/atomic.h +++ b/tools/include/linux/atomic.h @@ -12,4 +12,26 @@ void atomic_long_set(atomic_long_t *v, long i); #define atomic_cmpxchg_release atomic_cmpxchg #endif /* atomic_cmpxchg_relaxed */ +static inline bool atomic_try_cmpxchg(atomic_t *ptr, int *oldp, int new) +{ + int ret, old = *oldp; + + ret = atomic_cmpxchg(ptr, old, new); + if (ret != old) + *oldp = ret; + return ret == old; +} + +static inline bool atomic_inc_unless_negative(atomic_t *v) +{ + int c = atomic_read(v); + + do { + if (unlikely(c < 0)) + return false; + } while (!atomic_try_cmpxchg(v, &c, c + 1)); + + return true; +} + #endif /* __TOOLS_LINUX_ATOMIC_H */ diff --git a/tools/include/linux/bitfield.h b/tools/include/linux/bitfield.h index 6093fa6db260..ddf81f24956b 100644 --- a/tools/include/linux/bitfield.h +++ b/tools/include/linux/bitfield.h @@ -8,6 +8,7 @@ #define _LINUX_BITFIELD_H #include <linux/build_bug.h> +#include <linux/kernel.h> #include <asm/byteorder.h> /* diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 2a7f260ef9dc..5cb4f3942fd3 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -3,6 +3,7 @@ #define _TOOLS_LINUX_BITMAP_H #include <string.h> +#include <asm-generic/bitsperlong.h> #include <linux/align.h> #include <linux/bitops.h> #include <linux/find.h> @@ -19,9 +20,16 @@ bool __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); bool __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); +void __bitmap_set(unsigned long *map, unsigned int start, int len); void __bitmap_clear(unsigned long *map, unsigned int start, int len); bool __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); +bool __bitmap_subset(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); +bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); +void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) @@ -47,6 +55,17 @@ static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits); } +static __always_inline +void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned int nbits) +{ + unsigned int len = bitmap_size(nbits); + + if (small_const_nbits(nbits)) + *dst = *src; + else + memcpy(dst, src, len); +} + static inline bool bitmap_empty(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) @@ -79,6 +98,20 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, __bitmap_or(dst, src1, src2, nbits); } +static __always_inline +bool bitmap_andnot(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) +{ + if (small_const_nbits(nbits)) + return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; + return __bitmap_andnot(dst, src1, src2, nbits); +} + +static inline unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags __maybe_unused) +{ + return malloc(bitmap_size(nbits)); +} + /** * bitmap_zalloc - Allocate bitmap * @nbits: Number of bits @@ -150,6 +183,30 @@ static inline bool bitmap_intersects(const unsigned long *src1, return __bitmap_intersects(src1, src2, nbits); } +static __always_inline +bool bitmap_subset(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) +{ + if (small_const_nbits(nbits)) + return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_subset(src1, src2, nbits); +} + +static inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) +{ + if (__builtin_constant_p(nbits) && nbits == 1) + __set_bit(start, map); + else if (small_const_nbits(start + nbits)) + *map |= GENMASK(start + nbits - 1, start); + else if (__builtin_constant_p(start & BITMAP_MEM_MASK) && + IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) && + __builtin_constant_p(nbits & BITMAP_MEM_MASK) && + IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) + memset((char *)map + start / 8, 0xff, nbits / 8); + else + __bitmap_set(map, start, nbits); +} + static inline void bitmap_clear(unsigned long *map, unsigned int start, unsigned int nbits) { @@ -165,4 +222,15 @@ static inline void bitmap_clear(unsigned long *map, unsigned int start, else __bitmap_clear(map, start, nbits); } + +static __always_inline +void bitmap_xor(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) +{ + if (small_const_nbits(nbits)) + *dst = *src1 ^ *src2; + else + __bitmap_xor(dst, src1, src2, nbits); +} + #endif /* _TOOLS_LINUX_BITMAP_H */ diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 60044b608817..a40cc861b3a7 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -2,16 +2,15 @@ #ifndef __LINUX_BITS_H #define __LINUX_BITS_H -#include <linux/const.h> #include <vdso/bits.h> #include <uapi/linux/bits.h> -#include <asm/bitsperlong.h> #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) #define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG)) #define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) #define BITS_PER_BYTE 8 +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) /* * Create a contiguous bitmask starting at bit position @l and ending at @@ -19,36 +18,72 @@ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ #if !defined(__ASSEMBLY__) + +/* + * Missing asm support + * + * GENMASK_U*() and BIT_U*() depend on BITS_PER_TYPE() which relies on sizeof(), + * something not available in asm. Nevertheless, fixed width integers is a C + * concept. Assembly code can rely on the long and long long versions instead. + */ + #include <linux/build_bug.h> -#define GENMASK_INPUT_CHECK(h, l) \ - (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __is_constexpr((l) > (h)), (l) > (h), 0))) -#else +#include <linux/compiler.h> +#include <linux/overflow.h> + +#define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h))) + /* - * BUILD_BUG_ON_ZERO is not available in h files included from asm files, - * disable the input check if that is the case. + * Generate a mask for the specified type @t. Additional checks are made to + * guarantee the value returned fits in that type, relying on + * -Wshift-count-overflow compiler check to detect incompatible arguments. + * For example, all these create build errors or warnings: + * + * - GENMASK(15, 20): wrong argument order + * - GENMASK(72, 15): doesn't fit unsigned long + * - GENMASK_U32(33, 15): doesn't fit in a u32 */ -#define GENMASK_INPUT_CHECK(h, l) 0 -#endif +#define GENMASK_TYPE(t, h, l) \ + ((t)(GENMASK_INPUT_CHECK(h, l) + \ + (type_max(t) << (l) & \ + type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h))))) -#define GENMASK(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) -#define GENMASK_ULL(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) +#define GENMASK(h, l) GENMASK_TYPE(unsigned long, h, l) +#define GENMASK_ULL(h, l) GENMASK_TYPE(unsigned long long, h, l) + +#define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l) +#define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l) +#define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l) +#define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l) +#define GENMASK_U128(h, l) GENMASK_TYPE(u128, h, l) -#if !defined(__ASSEMBLY__) /* - * Missing asm support + * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The + * following examples generate compiler warnings due to -Wshift-count-overflow: * - * __GENMASK_U128() depends on _BIT128() which would not work - * in the asm code, as it shifts an 'unsigned __init128' data - * type instead of direct representation of 128 bit constants - * such as long and unsigned long. The fundamental problem is - * that a 128 bit constant will get silently truncated by the - * gcc compiler. + * - BIT_U8(8) + * - BIT_U32(-1) + * - BIT_U32(40) */ -#define GENMASK_U128(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l)) -#endif +#define BIT_INPUT_CHECK(type, nr) \ + BUILD_BUG_ON_ZERO(const_true((nr) >= BITS_PER_TYPE(type))) + +#define BIT_TYPE(type, nr) ((type)(BIT_INPUT_CHECK(type, nr) + BIT_ULL(nr))) + +#define BIT_U8(nr) BIT_TYPE(u8, nr) +#define BIT_U16(nr) BIT_TYPE(u16, nr) +#define BIT_U32(nr) BIT_TYPE(u32, nr) +#define BIT_U64(nr) BIT_TYPE(u64, nr) + +#else /* defined(__ASSEMBLY__) */ + +/* + * BUILD_BUG_ON_ZERO is not available in h files included from asm files, + * disable the input check if that is the case. + */ +#define GENMASK(h, l) __GENMASK(h, l) +#define GENMASK_ULL(h, l) __GENMASK_ULL(h, l) + +#endif /* !defined(__ASSEMBLY__) */ #endif /* __LINUX_BITS_H */ diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index b4898ff085de..406923bd4846 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -4,17 +4,17 @@ #include <linux/compiler.h> -#ifdef __CHECKER__ -#define BUILD_BUG_ON_ZERO(e) (0) -#else /* __CHECKER__ */ /* * Force a compilation error if condition is true, but also produce a * result (of value 0 and type int), so the expression can be used * e.g. in a structure initializer (or where-ever else comma expressions * aren't permitted). + * + * Take an error message as an optional second argument. If omitted, + * default to the stringification of the tested expression. */ -#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); }))) -#endif /* __CHECKER__ */ +#define BUILD_BUG_ON_ZERO(e, ...) \ + __BUILD_BUG_ON_ZERO_MSG(e, ##__VA_ARGS__, #e " is true") /* Force a compilation error if a constant expression is not a power of 2 */ #define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ @@ -32,7 +32,8 @@ /** * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied * error message. - * @condition: the condition which the compiler should know is false. + * @cond: the condition which the compiler should know is false. + * @msg: build-time error message * * See BUILD_BUG_ON for description. */ @@ -60,6 +61,7 @@ /** * static_assert - check integer constant expression at build time + * @expr: expression to be checked * * static_assert() is a wrapper for the C11 _Static_assert, with a * little macro magic to make the message optional (defaulting to the diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h new file mode 100644 index 000000000000..a86af9bc8bdc --- /dev/null +++ b/tools/include/linux/cfi_types.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Clang Control Flow Integrity (CFI) type definitions. + */ +#ifndef _LINUX_CFI_TYPES_H +#define _LINUX_CFI_TYPES_H + +#ifdef __ASSEMBLY__ +#include <linux/linkage.h> + +#ifdef CONFIG_CFI +/* + * Use the __kcfi_typeid_<function> type identifier symbol to + * annotate indirectly called assembly functions. The compiler emits + * these symbols for all address-taken function declarations in C + * code. + */ +#ifndef __CFI_TYPE +#define __CFI_TYPE(name) \ + .4byte __kcfi_typeid_##name +#endif + +#define SYM_TYPED_ENTRY(name, linkage, align...) \ + linkage(name) ASM_NL \ + align ASM_NL \ + __CFI_TYPE(name) ASM_NL \ + name: + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_TYPED_ENTRY(name, linkage, align) + +#else /* CONFIG_CFI */ + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_START(name, linkage, align) + +#endif /* CONFIG_CFI */ + +#ifndef SYM_TYPED_FUNC_START +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_CFI +#define DEFINE_CFI_TYPE(name, func) \ + /* \ + * Force a reference to the function so the compiler generates \ + * __kcfi_typeid_<func>. \ + */ \ + __ADDRESSABLE(func); \ + /* u32 name __ro_after_init = __kcfi_typeid_<func> */ \ + extern u32 name; \ + asm ( \ + " .pushsection .data..ro_after_init,\"aw\",\%progbits \n" \ + " .type " #name ",\%object \n" \ + " .globl " #name " \n" \ + " .p2align 2, 0x0 \n" \ + #name ": \n" \ + " .4byte __kcfi_typeid_" #func " \n" \ + " .size " #name ", 4 \n" \ + " .popsection \n" \ + ); +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _LINUX_CFI_TYPES_H */ diff --git a/tools/include/linux/compiler-context-analysis.h b/tools/include/linux/compiler-context-analysis.h new file mode 100644 index 000000000000..13a9115e9e58 --- /dev/null +++ b/tools/include/linux/compiler-context-analysis.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_COMPILER_CONTEXT_ANALYSIS_H +#define _TOOLS_LINUX_COMPILER_CONTEXT_ANALYSIS_H + +/* + * Macros and attributes for compiler-based static context analysis. + * No-op stubs for tools. + */ + +#define __guarded_by(...) +#define __pt_guarded_by(...) + +#define context_lock_struct(name, ...) struct __VA_ARGS__ name + +#define __no_context_analysis +#define __context_unsafe(comment) +#define context_unsafe(...) ({ __VA_ARGS__; }) +#define context_unsafe_alias(p) +#define disable_context_analysis() +#define enable_context_analysis() + +#define __must_hold(...) +#define __must_not_hold(...) +#define __acquires(...) +#define __cond_acquires(ret, x) +#define __releases(...) +#define __acquire(x) (void)0 +#define __release(x) (void)0 + +#define __must_hold_shared(...) +#define __acquires_shared(...) +#define __cond_acquires_shared(ret, x) +#define __releases_shared(...) +#define __acquire_shared(x) (void)0 +#define __release_shared(x) (void)0 + +#define __acquire_ret(call, expr) (call) +#define __acquire_shared_ret(call, expr) (call) +#define __acquires_ret +#define __acquires_shared_ret + +#endif /* _TOOLS_LINUX_COMPILER_CONTEXT_ANALYSIS_H */ diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 9c05a59f0184..f40bd2b04c29 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -81,6 +81,28 @@ #define __is_constexpr(x) \ (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) +/* + * Similar to statically_true() but produces a constant expression + * + * To be used in conjunction with macros, such as BUILD_BUG_ON_ZERO(), + * which require their input to be a constant expression and for which + * statically_true() would otherwise fail. + * + * This is a trade-off: const_true() requires all its operands to be + * compile time constants. Else, it would always returns false even on + * the most trivial cases like: + * + * true || non_const_var + * + * On the opposite, statically_true() is able to fold more complex + * tautologies and will return true on expressions such as: + * + * !(non_const_var * 8 % 4) + * + * For the general case, statically_true() is better. + */ +#define const_true(x) __builtin_choose_expr(__is_constexpr(x), x, false) + #ifdef __ANDROID__ /* * FIXME: Big hammer to get rid of tons of: @@ -116,6 +138,10 @@ # define __force #endif +#ifndef __iomem +# define __iomem +#endif + #ifndef __weak # define __weak __attribute__((weak)) #endif @@ -222,6 +248,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __asm__ ("" : "=r" (var) : "0" (var)) #endif +#ifndef __BUILD_BUG_ON_ZERO_MSG +#if defined(__clang__) +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)(sizeof(struct { int:(-!!(e)); }))) +#else +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)sizeof(struct {_Static_assert(!(e), msg);})) +#endif +#endif + #endif /* __ASSEMBLY__ */ #endif /* _TOOLS_LINUX_COMPILER_H */ diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h index d09f9dc172a4..b3adbf5682f0 100644 --- a/tools/include/linux/compiler_types.h +++ b/tools/include/linux/compiler_types.h @@ -13,23 +13,7 @@ #define __has_builtin(x) (0) #endif -#ifdef __CHECKER__ -/* context/locking */ -# define __must_hold(x) __attribute__((context(x,1,1))) -# define __acquires(x) __attribute__((context(x,0,1))) -# define __releases(x) __attribute__((context(x,1,0))) -# define __acquire(x) __context__(x,1) -# define __release(x) __context__(x,-1) -# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) -#else /* __CHECKER__ */ -/* context/locking */ -# define __must_hold(x) -# define __acquires(x) -# define __releases(x) -# define __acquire(x) (void)0 -# define __release(x) (void)0 -# define __cond_lock(x,c) (c) -#endif /* __CHECKER__ */ +#include <linux/compiler-context-analysis.h> /* Compiler specific macros. */ #ifdef __GNUC__ @@ -40,4 +24,26 @@ #define asm_goto_output(x...) asm goto(x) #endif +/* + * __unqual_scalar_typeof(x) - Declare an unqualified scalar type, leaving + * non-scalar types unchanged. + */ +/* + * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char' + * is not type-compatible with 'signed char', and we define a separate case. + */ +#define __scalar_type_to_expr_cases(type) \ + unsigned type: (unsigned type)0, \ + signed type: (signed type)0 + +#define __unqual_scalar_typeof(x) typeof( \ + _Generic((x), \ + char: (char)0, \ + __scalar_type_to_expr_cases(char), \ + __scalar_type_to_expr_cases(short), \ + __scalar_type_to_expr_cases(int), \ + __scalar_type_to_expr_cases(long), \ + __scalar_type_to_expr_cases(long long), \ + default: (x))) + #endif /* __LINUX_COMPILER_TYPES_H */ diff --git a/tools/include/linux/container_of.h b/tools/include/linux/container_of.h new file mode 100644 index 000000000000..c879e14c3dd6 --- /dev/null +++ b/tools/include/linux/container_of.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_CONTAINER_OF_H +#define _TOOLS_LINUX_CONTAINER_OF_H + +#ifndef container_of +/** + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + */ +#define container_of(ptr, type, member) ({ \ + const typeof(((type *)0)->member) * __mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)); }) +#endif + +#endif /* _TOOLS_LINUX_CONTAINER_OF_H */ diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index 89b0ac0014b0..2e179abe472a 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -22,30 +22,6 @@ #define CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) (0x10 + (cpu * 2)) /* - * Below are the definition of bit offsets for perf option, and works as - * arbitrary values for all ETM versions. - * - * Most of them are orignally from ETMv3.5/PTM's ETMCR config, therefore, - * ETMv3.5/PTM doesn't define ETMCR config bits with prefix "ETM3_" and - * directly use below macros as config bits. - */ -#define ETM_OPT_BRANCH_BROADCAST 8 -#define ETM_OPT_CYCACC 12 -#define ETM_OPT_CTXTID 14 -#define ETM_OPT_CTXTID2 15 -#define ETM_OPT_TS 28 -#define ETM_OPT_RETSTK 29 - -/* ETMv4 CONFIGR programming bits for the ETM OPTs */ -#define ETM4_CFG_BIT_BB 3 -#define ETM4_CFG_BIT_CYCACC 4 -#define ETM4_CFG_BIT_CTXTID 6 -#define ETM4_CFG_BIT_VMID 7 -#define ETM4_CFG_BIT_TS 11 -#define ETM4_CFG_BIT_RETSTK 12 -#define ETM4_CFG_BIT_VMID_OPT 15 - -/* * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload. * Used to associate a CPU with the CoreSight Trace ID. * [07:00] - Trace ID - uses 8 bits to make value easy to read in file. diff --git a/tools/include/linux/gfp.h b/tools/include/linux/gfp.h index 6a10ff5f5be9..9e957b57b694 100644 --- a/tools/include/linux/gfp.h +++ b/tools/include/linux/gfp.h @@ -5,6 +5,10 @@ #include <linux/types.h> #include <linux/gfp_types.h> +/* Helper macro to avoid gfp flags if they are the default one */ +#define __default_gfp(a,...) a +#define default_gfp(...) __default_gfp(__VA_ARGS__ __VA_OPT__(,) GFP_KERNEL) + static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) { return !!(gfp_flags & __GFP_DIRECT_RECLAIM); diff --git a/tools/include/linux/gfp_types.h b/tools/include/linux/gfp_types.h index 5f9f1ed190a0..6c75df30a281 100644 --- a/tools/include/linux/gfp_types.h +++ b/tools/include/linux/gfp_types.h @@ -1 +1,391 @@ -#include "../../../include/linux/gfp_types.h" +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_GFP_TYPES_H +#define __LINUX_GFP_TYPES_H + +#include <linux/bits.h> + +/* The typedef is in types.h but we want the documentation here */ +#if 0 +/** + * typedef gfp_t - Memory allocation flags. + * + * GFP flags are commonly used throughout Linux to indicate how memory + * should be allocated. The GFP acronym stands for get_free_pages(), + * the underlying memory allocation function. Not every GFP flag is + * supported by every function which may allocate memory. Most users + * will want to use a plain ``GFP_KERNEL``. + */ +typedef unsigned int __bitwise gfp_t; +#endif + +/* + * In case of changes, please don't forget to update + * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c + */ + +enum { + ___GFP_DMA_BIT, + ___GFP_HIGHMEM_BIT, + ___GFP_DMA32_BIT, + ___GFP_MOVABLE_BIT, + ___GFP_RECLAIMABLE_BIT, + ___GFP_HIGH_BIT, + ___GFP_IO_BIT, + ___GFP_FS_BIT, + ___GFP_ZERO_BIT, + ___GFP_UNUSED_BIT, /* 0x200u unused */ + ___GFP_DIRECT_RECLAIM_BIT, + ___GFP_KSWAPD_RECLAIM_BIT, + ___GFP_WRITE_BIT, + ___GFP_NOWARN_BIT, + ___GFP_RETRY_MAYFAIL_BIT, + ___GFP_NOFAIL_BIT, + ___GFP_NORETRY_BIT, + ___GFP_MEMALLOC_BIT, + ___GFP_COMP_BIT, + ___GFP_NOMEMALLOC_BIT, + ___GFP_HARDWALL_BIT, + ___GFP_THISNODE_BIT, + ___GFP_ACCOUNT_BIT, + ___GFP_ZEROTAGS_BIT, +#ifdef CONFIG_KASAN_HW_TAGS + ___GFP_SKIP_ZERO_BIT, + ___GFP_SKIP_KASAN_BIT, +#endif +#ifdef CONFIG_LOCKDEP + ___GFP_NOLOCKDEP_BIT, +#endif + ___GFP_NO_OBJ_EXT_BIT, + ___GFP_LAST_BIT +}; + +/* Plain integer GFP bitmasks. Do not use this directly. */ +#define ___GFP_DMA BIT(___GFP_DMA_BIT) +#define ___GFP_HIGHMEM BIT(___GFP_HIGHMEM_BIT) +#define ___GFP_DMA32 BIT(___GFP_DMA32_BIT) +#define ___GFP_MOVABLE BIT(___GFP_MOVABLE_BIT) +#define ___GFP_RECLAIMABLE BIT(___GFP_RECLAIMABLE_BIT) +#define ___GFP_HIGH BIT(___GFP_HIGH_BIT) +#define ___GFP_IO BIT(___GFP_IO_BIT) +#define ___GFP_FS BIT(___GFP_FS_BIT) +#define ___GFP_ZERO BIT(___GFP_ZERO_BIT) +/* 0x200u unused */ +#define ___GFP_DIRECT_RECLAIM BIT(___GFP_DIRECT_RECLAIM_BIT) +#define ___GFP_KSWAPD_RECLAIM BIT(___GFP_KSWAPD_RECLAIM_BIT) +#define ___GFP_WRITE BIT(___GFP_WRITE_BIT) +#define ___GFP_NOWARN BIT(___GFP_NOWARN_BIT) +#define ___GFP_RETRY_MAYFAIL BIT(___GFP_RETRY_MAYFAIL_BIT) +#define ___GFP_NOFAIL BIT(___GFP_NOFAIL_BIT) +#define ___GFP_NORETRY BIT(___GFP_NORETRY_BIT) +#define ___GFP_MEMALLOC BIT(___GFP_MEMALLOC_BIT) +#define ___GFP_COMP BIT(___GFP_COMP_BIT) +#define ___GFP_NOMEMALLOC BIT(___GFP_NOMEMALLOC_BIT) +#define ___GFP_HARDWALL BIT(___GFP_HARDWALL_BIT) +#define ___GFP_THISNODE BIT(___GFP_THISNODE_BIT) +#define ___GFP_ACCOUNT BIT(___GFP_ACCOUNT_BIT) +#define ___GFP_ZEROTAGS BIT(___GFP_ZEROTAGS_BIT) +#ifdef CONFIG_KASAN_HW_TAGS +#define ___GFP_SKIP_ZERO BIT(___GFP_SKIP_ZERO_BIT) +#define ___GFP_SKIP_KASAN BIT(___GFP_SKIP_KASAN_BIT) +#else +#define ___GFP_SKIP_ZERO 0 +#define ___GFP_SKIP_KASAN 0 +#endif +#ifdef CONFIG_LOCKDEP +#define ___GFP_NOLOCKDEP BIT(___GFP_NOLOCKDEP_BIT) +#else +#define ___GFP_NOLOCKDEP 0 +#endif +#define ___GFP_NO_OBJ_EXT BIT(___GFP_NO_OBJ_EXT_BIT) + +/* + * Physical address zone modifiers (see linux/mmzone.h - low four bits) + * + * Do not put any conditional on these. If necessary modify the definitions + * without the underscores and use them consistently. The definitions here may + * be used in bit comparisons. + */ +#define __GFP_DMA ((__force gfp_t)___GFP_DMA) +#define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) +#define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) +#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */ +#define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) + +/** + * DOC: Page mobility and placement hints + * + * Page mobility and placement hints + * --------------------------------- + * + * These flags provide hints about how mobile the page is. Pages with similar + * mobility are placed within the same pageblocks to minimise problems due + * to external fragmentation. + * + * %__GFP_MOVABLE (also a zone modifier) indicates that the page can be + * moved by page migration during memory compaction or can be reclaimed. + * + * %__GFP_RECLAIMABLE is used for slab allocations that specify + * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers. + * + * %__GFP_WRITE indicates the caller intends to dirty the page. Where possible, + * these pages will be spread between local zones to avoid all the dirty + * pages being in one zone (fair zone allocation policy). + * + * %__GFP_HARDWALL enforces the cpuset memory allocation policy. + * + * %__GFP_THISNODE forces the allocation to be satisfied from the requested + * node with no fallbacks or placement policy enforcements. + * + * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg. + * + * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension. + * mark_obj_codetag_empty() should be called upon freeing for objects allocated + * with this flag to indicate that their NULL tags are expected and normal. + */ +#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) +#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) +#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) +#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) +#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT) +#define __GFP_NO_OBJ_EXT ((__force gfp_t)___GFP_NO_OBJ_EXT) + +/** + * DOC: Watermark modifiers + * + * Watermark modifiers -- controls access to emergency reserves + * ------------------------------------------------------------ + * + * %__GFP_HIGH indicates that the caller is high-priority and that granting + * the request is necessary before the system can make forward progress. + * For example creating an IO context to clean pages and requests + * from atomic context. + * + * %__GFP_MEMALLOC allows access to all memory. This should only be used when + * the caller guarantees the allocation will allow more memory to be freed + * very shortly e.g. process exiting or swapping. Users either should + * be the MM or co-ordinating closely with the VM (e.g. swap over NFS). + * Users of this flag have to be extremely careful to not deplete the reserve + * completely and implement a throttling mechanism which controls the + * consumption of the reserve based on the amount of freed memory. + * Usage of a pre-allocated pool (e.g. mempool) should be always considered + * before using this flag. + * + * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves. + * This takes precedence over the %__GFP_MEMALLOC flag if both are set. + */ +#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) +#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) +#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) + +/** + * DOC: Reclaim modifiers + * + * Reclaim modifiers + * ----------------- + * Please note that all the following flags are only applicable to sleepable + * allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them). + * + * %__GFP_IO can start physical IO. + * + * %__GFP_FS can call down to the low-level FS. Clearing the flag avoids the + * allocator recursing into the filesystem which might already be holding + * locks. + * + * %__GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim. + * This flag can be cleared to avoid unnecessary delays when a fallback + * option is available. + * + * %__GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when + * the low watermark is reached and have it reclaim pages until the high + * watermark is reached. A caller may wish to clear this flag when fallback + * options are available and the reclaim is likely to disrupt the system. The + * canonical example is THP allocation where a fallback is cheap but + * reclaim/compaction may cause indirect stalls. + * + * %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. + * + * The default allocator behavior depends on the request size. We have a concept + * of so-called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER). + * !costly allocations are too essential to fail so they are implicitly + * non-failing by default (with some exceptions like OOM victims might fail so + * the caller still has to check for failures) while costly requests try to be + * not disruptive and back off even without invoking the OOM killer. + * The following three modifiers might be used to override some of these + * implicit rules. Please note that all of them must be used along with + * %__GFP_DIRECT_RECLAIM flag. + * + * %__GFP_NORETRY: The VM implementation will try only very lightweight + * memory direct reclaim to get some memory under memory pressure (thus + * it can sleep). It will avoid disruptive actions like OOM killer. The + * caller must handle the failure which is quite likely to happen under + * heavy memory pressure. The flag is suitable when failure can easily be + * handled at small cost, such as reduced throughput. + * + * %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim + * procedures that have previously failed if there is some indication + * that progress has been made elsewhere. It can wait for other + * tasks to attempt high-level approaches to freeing memory such as + * compaction (which removes fragmentation) and page-out. + * There is still a definite limit to the number of retries, but it is + * a larger limit than with %__GFP_NORETRY. + * Allocations with this flag may fail, but only when there is + * genuinely little unused memory. While these allocations do not + * directly trigger the OOM killer, their failure indicates that + * the system is likely to need to use the OOM killer soon. The + * caller must handle failure, but can reasonably do so by failing + * a higher-level request, or completing it only in a much less + * efficient manner. + * If the allocation does fail, and the caller is in a position to + * free some non-essential memory, doing so could benefit the system + * as a whole. + * + * %__GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller + * cannot handle allocation failures. The allocation could block + * indefinitely but will never return with failure. Testing for + * failure is pointless. + * It _must_ be blockable and used together with __GFP_DIRECT_RECLAIM. + * It should _never_ be used in non-sleepable contexts. + * New users should be evaluated carefully (and the flag should be + * used only when there is no reasonable failure policy) but it is + * definitely preferable to use the flag rather than opencode endless + * loop around allocator. + * Allocating pages from the buddy with __GFP_NOFAIL and order > 1 is + * not supported. Please consider using kvmalloc() instead. + */ +#define __GFP_IO ((__force gfp_t)___GFP_IO) +#define __GFP_FS ((__force gfp_t)___GFP_FS) +#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ +#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ +#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) +#define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL) +#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) +#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) + +/** + * DOC: Action modifiers + * + * Action modifiers + * ---------------- + * + * %__GFP_NOWARN suppresses allocation failure reports. + * + * %__GFP_COMP address compound page metadata. + * + * %__GFP_ZERO returns a zeroed page on success. + * + * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself + * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that + * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting + * memory tags at the same time as zeroing memory has minimal additional + * performance impact. + * + * %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation. + * Used for userspace and vmalloc pages; the latter are unpoisoned by + * kasan_unpoison_vmalloc instead. For userspace pages, results in + * poisoning being skipped as well, see should_skip_kasan_poison for + * details. Only effective in HW_TAGS mode. + */ +#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) +#define __GFP_COMP ((__force gfp_t)___GFP_COMP) +#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) +#define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) +#define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO) +#define __GFP_SKIP_KASAN ((__force gfp_t)___GFP_SKIP_KASAN) + +/* Disable lockdep for GFP context tracking */ +#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) + +/* Room for N __GFP_FOO bits */ +#define __GFP_BITS_SHIFT ___GFP_LAST_BIT +#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) + +/** + * DOC: Useful GFP flag combinations + * + * Useful GFP flag combinations + * ---------------------------- + * + * Useful GFP flag combinations that are commonly used. It is recommended + * that subsystems start with one of these combinations and then set/clear + * %__GFP_FOO flags as necessary. + * + * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower + * watermark is applied to allow access to "atomic reserves". + * The current implementation doesn't support NMI, nor contexts that disable + * preemption under PREEMPT_RT. This includes raw_spin_lock() and plain + * preempt_disable() - see "Memory allocation" in + * Documentation/core-api/real-time/differences.rst for more info. + * + * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires + * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim. + * + * %GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is + * accounted to kmemcg. + * + * %GFP_NOWAIT is for kernel allocations that should not stall for direct + * reclaim, start physical IO or use any filesystem callback. It is very + * likely to fail to allocate memory, even for very small allocations. + * The same restrictions on calling contexts apply as for %GFP_ATOMIC. + * + * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages + * that do not require the starting of any physical IO. + * Please try to avoid using this flag directly and instead use + * memalloc_noio_{save,restore} to mark the whole scope which cannot + * perform any IO with a short explanation why. All allocation requests + * will inherit GFP_NOIO implicitly. + * + * %GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. + * Please try to avoid using this flag directly and instead use + * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't + * recurse into the FS layer with a short explanation why. All allocation + * requests will inherit GFP_NOFS implicitly. + * + * %GFP_USER is for userspace allocations that also need to be directly + * accessibly by the kernel or hardware. It is typically used by hardware + * for buffers that are mapped to userspace (e.g. graphics) that hardware + * still must DMA to. cpuset limits are enforced for these allocations. + * + * %GFP_DMA exists for historical reasons and should be avoided where possible. + * The flags indicates that the caller requires that the lowest zone be + * used (%ZONE_DMA or 16M on x86-64). Ideally, this would be removed but + * it would require careful auditing as some users really require it and + * others use the flag to avoid lowmem reserves in %ZONE_DMA and treat the + * lowest zone as a type of emergency reserve. + * + * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit + * address. Note that kmalloc(..., GFP_DMA32) does not return DMA32 memory + * because the DMA32 kmalloc cache array is not implemented. + * (Reason: there is no such user in kernel). + * + * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace, + * do not need to be directly accessible by the kernel but that cannot + * move once in use. An example may be a hardware allocation that maps + * data directly into userspace but has no addressing limitations. + * + * %GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not + * need direct access to but can use kmap() when access is required. They + * are expected to be movable via page reclaim or page migration. Typically, + * pages on the LRU would also be allocated with %GFP_HIGHUSER_MOVABLE. + * + * %GFP_TRANSHUGE and %GFP_TRANSHUGE_LIGHT are used for THP allocations. They + * are compound allocations that will generally fail quickly if memory is not + * available and will not wake kswapd/kcompactd on failure. The _LIGHT + * version does not attempt reclaim/compaction at all and is by default used + * in page fault path, while the non-light is used by khugepaged. + */ +#define GFP_ATOMIC (__GFP_HIGH|__GFP_KSWAPD_RECLAIM) +#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) +#define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) +#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM | __GFP_NOWARN) +#define GFP_NOIO (__GFP_RECLAIM) +#define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) +#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) +#define GFP_DMA __GFP_DMA +#define GFP_DMA32 __GFP_DMA32 +#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) +#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | __GFP_SKIP_KASAN) +#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ + __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) +#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) + +#endif /* __LINUX_GFP_TYPES_H */ diff --git a/tools/include/linux/interval_tree_generic.h b/tools/include/linux/interval_tree_generic.h index aaa8a0767aa3..c5a2fed49eb0 100644 --- a/tools/include/linux/interval_tree_generic.h +++ b/tools/include/linux/interval_tree_generic.h @@ -77,7 +77,7 @@ ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, \ * Cond2: start <= ITLAST(node) \ */ \ \ -static ITSTRUCT * \ +ITSTATIC ITSTRUCT * \ ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ { \ while (true) { \ @@ -104,12 +104,8 @@ ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ if (ITSTART(node) <= last) { /* Cond1 */ \ if (start <= ITLAST(node)) /* Cond2 */ \ return node; /* node is leftmost match */ \ - if (node->ITRB.rb_right) { \ - node = rb_entry(node->ITRB.rb_right, \ - ITSTRUCT, ITRB); \ - if (start <= node->ITSUBTREE) \ - continue; \ - } \ + node = rb_entry(node->ITRB.rb_right, ITSTRUCT, ITRB); \ + continue; \ } \ return NULL; /* No match */ \ } \ diff --git a/tools/include/linux/io.h b/tools/include/linux/io.h index e129871fe661..4b94b84160b8 100644 --- a/tools/include/linux/io.h +++ b/tools/include/linux/io.h @@ -2,4 +2,6 @@ #ifndef _TOOLS_IO_H #define _TOOLS_IO_H -#endif +#include <asm/io.h> + +#endif /* _TOOLS_IO_H */ diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h index 5a37ccbec54f..f61a01dd7eb7 100644 --- a/tools/include/linux/kallsyms.h +++ b/tools/include/linux/kallsyms.h @@ -18,6 +18,7 @@ static inline const char *kallsyms_lookup(unsigned long addr, return NULL; } +#ifdef HAVE_BACKTRACE_SUPPORT #include <execinfo.h> #include <stdlib.h> static inline void print_ip_sym(const char *loglvl, unsigned long ip) @@ -30,5 +31,8 @@ static inline void print_ip_sym(const char *loglvl, unsigned long ip) free(name); } +#else +static inline void print_ip_sym(const char *loglvl, unsigned long ip) {} +#endif #endif diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index 07cfad817d53..c8c18d3908a9 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -11,6 +11,7 @@ #include <linux/panic.h> #include <endian.h> #include <byteswap.h> +#include <linux/container_of.h> #ifndef UINT_MAX #define UINT_MAX (~0U) @@ -25,19 +26,6 @@ #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) #endif -#ifndef container_of -/** - * container_of - cast a member of a structure out to the containing structure - * @ptr: the pointer to the member. - * @type: the type of the container struct this is embedded in. - * @member: the name of the member within the struct. - * - */ -#define container_of(ptr, type, member) ({ \ - const typeof(((type *)0)->member) * __mptr = (ptr); \ - (type *)((char *)__mptr - offsetof(type, member)); }) -#endif - #ifndef max #define max(x, y) ({ \ typeof(x) _max1 = (x); \ diff --git a/tools/include/linux/list.h b/tools/include/linux/list.h index a4dfb6a7cc6a..a692ff7aed5c 100644 --- a/tools/include/linux/list.h +++ b/tools/include/linux/list.h @@ -170,6 +170,16 @@ static inline void list_move_tail(struct list_head *list, } /** + * list_is_first -- tests whether @list is the first entry in list @head + * @list: the entry to test + * @head: the head of the list + */ +static inline int list_is_first(const struct list_head *list, const struct list_head *head) +{ + return list->prev == head; +} + +/** * list_is_last - tests whether @list is the last entry in list @head * @list: the entry to test * @head: the head of the list diff --git a/tools/include/linux/livepatch_external.h b/tools/include/linux/livepatch_external.h new file mode 100644 index 000000000000..138af19b0f5c --- /dev/null +++ b/tools/include/linux/livepatch_external.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * External livepatch interfaces for patch creation tooling + */ + +#ifndef _LINUX_LIVEPATCH_EXTERNAL_H_ +#define _LINUX_LIVEPATCH_EXTERNAL_H_ + +#include <linux/types.h> + +#define KLP_RELOC_SEC_PREFIX ".klp.rela." +#define KLP_SYM_PREFIX ".klp.sym." + +#define __KLP_PRE_PATCH_PREFIX __klp_pre_patch_callback_ +#define __KLP_POST_PATCH_PREFIX __klp_post_patch_callback_ +#define __KLP_PRE_UNPATCH_PREFIX __klp_pre_unpatch_callback_ +#define __KLP_POST_UNPATCH_PREFIX __klp_post_unpatch_callback_ + +#define KLP_PRE_PATCH_PREFIX __stringify(__KLP_PRE_PATCH_PREFIX) +#define KLP_POST_PATCH_PREFIX __stringify(__KLP_POST_PATCH_PREFIX) +#define KLP_PRE_UNPATCH_PREFIX __stringify(__KLP_PRE_UNPATCH_PREFIX) +#define KLP_POST_UNPATCH_PREFIX __stringify(__KLP_POST_UNPATCH_PREFIX) + +struct klp_object; + +typedef int (*klp_pre_patch_t)(struct klp_object *obj); +typedef void (*klp_post_patch_t)(struct klp_object *obj); +typedef void (*klp_pre_unpatch_t)(struct klp_object *obj); +typedef void (*klp_post_unpatch_t)(struct klp_object *obj); + +/** + * struct klp_callbacks - pre/post live-(un)patch callback structure + * @pre_patch: executed before code patching + * @post_patch: executed after code patching + * @pre_unpatch: executed before code unpatching + * @post_unpatch: executed after code unpatching + * @post_unpatch_enabled: flag indicating if post-unpatch callback + * should run + * + * All callbacks are optional. Only the pre-patch callback, if provided, + * will be unconditionally executed. If the parent klp_object fails to + * patch for any reason, including a non-zero error status returned from + * the pre-patch callback, no further callbacks will be executed. + */ +struct klp_callbacks { + klp_pre_patch_t pre_patch; + klp_post_patch_t post_patch; + klp_pre_unpatch_t pre_unpatch; + klp_post_unpatch_t post_unpatch; + bool post_unpatch_enabled; +}; + +/* + * 'struct klp_{func,object}_ext' are compact "external" representations of + * 'struct klp_{func,object}'. They are used by objtool for livepatch + * generation. The structs are then read by the livepatch module and converted + * to the real structs before calling klp_enable_patch(). + * + * TODO make these the official API for klp_enable_patch(). That should + * simplify livepatch's interface as well as its data structure lifetime + * management. + */ +struct klp_func_ext { + const char *old_name; + void *new_func; + unsigned long sympos; +}; + +struct klp_object_ext { + const char *name; + struct klp_func_ext *funcs; + struct klp_callbacks callbacks; + unsigned int nr_funcs; +}; + +#endif /* _LINUX_LIVEPATCH_EXTERNAL_H_ */ diff --git a/tools/include/linux/math64.h b/tools/include/linux/math64.h index 4ad45d5943dc..8a67d478bf19 100644 --- a/tools/include/linux/math64.h +++ b/tools/include/linux/math64.h @@ -72,4 +72,9 @@ static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c) } #endif +static inline u64 div_u64(u64 dividend, u32 divisor) +{ + return dividend / divisor; +} + #endif /* _LINUX_MATH64_H */ diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h index 677c37e4a18c..84b5954f66c3 100644 --- a/tools/include/linux/mm.h +++ b/tools/include/linux/mm.h @@ -4,6 +4,7 @@ #include <linux/align.h> #include <linux/mmzone.h> +#include <linux/sizes.h> #define PAGE_SHIFT 12 #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) @@ -16,6 +17,7 @@ #define __va(x) ((void *)((unsigned long)(x))) #define __pa(x) ((unsigned long)(x)) +#define __pa_symbol(x) ((unsigned long)(x)) #define pfn_to_page(pfn) ((void *)((pfn) * PAGE_SIZE)) @@ -31,8 +33,6 @@ static inline phys_addr_t virt_to_phys(volatile void *address) return (phys_addr_t)address; } -void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid); - static inline void totalram_pages_inc(void) { } diff --git a/tools/include/linux/moduleparam.h b/tools/include/linux/moduleparam.h new file mode 100644 index 000000000000..4c4d05bef0cb --- /dev/null +++ b/tools/include/linux/moduleparam.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_MODULE_PARAMS_H +#define _TOOLS_LINUX_MODULE_PARAMS_H + +#define MODULE_PARM_DESC(parm, desc) + +#endif // _TOOLS_LINUX_MODULE_PARAMS_H diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index df5d9fa84dba..c6def4049b1a 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -65,5 +65,8 @@ struct unwind_hint { #define ANNOTYPE_IGNORE_ALTS 6 #define ANNOTYPE_INTRA_FUNCTION_CALL 7 #define ANNOTYPE_REACHABLE 8 +#define ANNOTYPE_NOCFI 9 + +#define ANNOTYPE_DATA_SPECIAL 1 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/include/linux/overflow.h b/tools/include/linux/overflow.h index dcb0c1bf6866..3427d7880326 100644 --- a/tools/include/linux/overflow.h +++ b/tools/include/linux/overflow.h @@ -69,6 +69,25 @@ }) /** + * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX + * @factor1: first factor + * @factor2: second factor + * + * Returns: calculate @factor1 * @factor2, both promoted to size_t, + * with any overflow causing the return value to be SIZE_MAX. The + * lvalue must be size_t to avoid implicit type conversion. + */ +static inline size_t __must_check size_mul(size_t factor1, size_t factor2) +{ + size_t bytes; + + if (check_mul_overflow(factor1, factor2, &bytes)) + return SIZE_MAX; + + return bytes; +} + +/** * array_size() - Calculate size of 2-dimensional array. * * @a: dimension one diff --git a/tools/include/linux/pci_ids.h b/tools/include/linux/pci_ids.h new file mode 120000 index 000000000000..1c9e88f41261 --- /dev/null +++ b/tools/include/linux/pci_ids.h @@ -0,0 +1 @@ +../../../include/linux/pci_ids.h
\ No newline at end of file diff --git a/tools/include/linux/prandom.h b/tools/include/linux/prandom.h new file mode 100644 index 000000000000..b745041ccd6a --- /dev/null +++ b/tools/include/linux/prandom.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TOOLS_LINUX_PRANDOM_H +#define __TOOLS_LINUX_PRANDOM_H + +#include <linux/types.h> + +struct rnd_state { + __u32 s1, s2, s3, s4; +}; + +/* + * Handle minimum values for seeds + */ +static inline u32 __seed(u32 x, u32 m) +{ + return (x < m) ? x + m : x; +} + +/** + * prandom_seed_state - set seed for prandom_u32_state(). + * @state: pointer to state structure to receive the seed. + * @seed: arbitrary 64-bit value to use as a seed. + */ +static inline void prandom_seed_state(struct rnd_state *state, u64 seed) +{ + u32 i = ((seed >> 32) ^ (seed << 10) ^ seed) & 0xffffffffUL; + + state->s1 = __seed(i, 2U); + state->s2 = __seed(i, 8U); + state->s3 = __seed(i, 16U); + state->s4 = __seed(i, 128U); +} + +/** + * prandom_u32_state - seeded pseudo-random number generator. + * @state: pointer to state structure holding seeded state. + * + * This is used for pseudo-randomness with no outside seeding. + * For more random results, use get_random_u32(). + */ +static inline u32 prandom_u32_state(struct rnd_state *state) +{ +#define TAUSWORTHE(s, a, b, c, d) (((s & c) << d) ^ (((s << a) ^ s) >> b)) + state->s1 = TAUSWORTHE(state->s1, 6U, 13U, 4294967294U, 18U); + state->s2 = TAUSWORTHE(state->s2, 2U, 27U, 4294967288U, 2U); + state->s3 = TAUSWORTHE(state->s3, 13U, 21U, 4294967280U, 7U); + state->s4 = TAUSWORTHE(state->s4, 3U, 12U, 4294967168U, 13U); + + return (state->s1 ^ state->s2 ^ state->s3 ^ state->s4); +} +#endif // __TOOLS_LINUX_PRANDOM_H diff --git a/tools/include/linux/refcount.h b/tools/include/linux/refcount.h index 36cb29bc57c2..1f30956e070d 100644 --- a/tools/include/linux/refcount.h +++ b/tools/include/linux/refcount.h @@ -60,6 +60,11 @@ static inline void refcount_set(refcount_t *r, unsigned int n) atomic_set(&r->refs, n); } +static inline void refcount_set_release(refcount_t *r, unsigned int n) +{ + atomic_set(&r->refs, n); +} + static inline unsigned int refcount_read(const refcount_t *r) { return atomic_read(&r->refs); diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h index 51b25e9c4ec7..2e63c2e726aa 100644 --- a/tools/include/linux/slab.h +++ b/tools/include/linux/slab.h @@ -4,14 +4,35 @@ #include <linux/types.h> #include <linux/gfp.h> +#include <pthread.h> -#define SLAB_PANIC 2 #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ #define kzalloc_node(size, flags, node) kmalloc(size, flags) +enum _slab_flag_bits { + _SLAB_KMALLOC, + _SLAB_HWCACHE_ALIGN, + _SLAB_PANIC, + _SLAB_TYPESAFE_BY_RCU, + _SLAB_ACCOUNT, + _SLAB_FLAGS_LAST_BIT +}; + +#define __SLAB_FLAG_BIT(nr) ((unsigned int __force)(1U << (nr))) +#define __SLAB_FLAG_UNUSED ((unsigned int __force)(0U)) + +#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN) +#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC) +#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU) +#ifdef CONFIG_MEMCG +# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) +#else +# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED +#endif void *kmalloc(size_t size, gfp_t gfp); void kfree(void *p); +void *kmalloc_array(size_t n, size_t size, gfp_t gfp); bool slab_is_available(void); @@ -22,6 +43,98 @@ enum slab_state { FULL }; +struct kmem_cache { + pthread_mutex_t lock; + unsigned int size; + unsigned int align; + unsigned int sheaf_capacity; + int nr_objs; + void *objs; + void (*ctor)(void *); + bool non_kernel_enabled; + unsigned int non_kernel; + unsigned long nr_allocated; + unsigned long nr_tallocated; + bool exec_callback; + void (*callback)(void *); + void *private; +}; + +struct kmem_cache_args { + /** + * @align: The required alignment for the objects. + * + * %0 means no specific alignment is requested. + */ + unsigned int align; + /** + * @sheaf_capacity: The maximum size of the sheaf. + */ + unsigned int sheaf_capacity; + /** + * @useroffset: Usercopy region offset. + * + * %0 is a valid offset, when @usersize is non-%0 + */ + unsigned int useroffset; + /** + * @usersize: Usercopy region size. + * + * %0 means no usercopy region is specified. + */ + unsigned int usersize; + /** + * @freeptr_offset: Custom offset for the free pointer + * in &SLAB_TYPESAFE_BY_RCU caches + * + * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer + * outside of the object. This might cause the object to grow in size. + * Cache creators that have a reason to avoid this can specify a custom + * free pointer offset in their struct where the free pointer will be + * placed. + * + * Note that placing the free pointer inside the object requires the + * caller to ensure that no fields are invalidated that are required to + * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for + * details). + * + * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset + * is specified, %use_freeptr_offset must be set %true. + * + * Note that @ctor currently isn't supported with custom free pointers + * as a @ctor requires an external free pointer. + */ + unsigned int freeptr_offset; + /** + * @use_freeptr_offset: Whether a @freeptr_offset is used. + */ + bool use_freeptr_offset; + /** + * @ctor: A constructor for the objects. + * + * The constructor is invoked for each object in a newly allocated slab + * page. It is the cache user's responsibility to free object in the + * same state as after calling the constructor, or deal appropriately + * with any differences between a freshly constructed and a reallocated + * object. + * + * %NULL means no constructor. + */ + void (*ctor)(void *); +}; + +struct slab_sheaf { + union { + struct list_head barn_list; + /* only used for prefilled sheafs */ + unsigned int capacity; + }; + struct kmem_cache *cache; + unsigned int size; + int node; /* only used for rcu_sheaf */ + void *objects[]; +}; + static inline void *kzalloc(size_t size, gfp_t gfp) { return kmalloc(size, gfp | __GFP_ZERO); @@ -36,12 +149,66 @@ static inline void *kmem_cache_alloc(struct kmem_cache *cachep, int flags) } void kmem_cache_free(struct kmem_cache *cachep, void *objp); -struct kmem_cache *kmem_cache_create(const char *name, unsigned int size, - unsigned int align, unsigned int flags, - void (*ctor)(void *)); + +struct kmem_cache * +__kmem_cache_create_args(const char *name, unsigned int size, + struct kmem_cache_args *args, unsigned int flags); + +/* If NULL is passed for @args, use this variant with default arguments. */ +static inline struct kmem_cache * +__kmem_cache_default_args(const char *name, unsigned int size, + struct kmem_cache_args *args, unsigned int flags) +{ + struct kmem_cache_args kmem_default_args = {}; + + return __kmem_cache_create_args(name, size, &kmem_default_args, flags); +} + +static inline struct kmem_cache * +__kmem_cache_create(const char *name, unsigned int size, unsigned int align, + unsigned int flags, void (*ctor)(void *)) +{ + struct kmem_cache_args kmem_args = { + .align = align, + .ctor = ctor, + }; + + return __kmem_cache_create_args(name, size, &kmem_args, flags); +} + +#define kmem_cache_create(__name, __object_size, __args, ...) \ + _Generic((__args), \ + struct kmem_cache_args *: __kmem_cache_create_args, \ + void *: __kmem_cache_default_args, \ + default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__) void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list); -int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, +bool kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, void **list); +struct slab_sheaf * +kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size); + +void * +kmem_cache_alloc_from_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf); + +void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf); +int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf **sheafp, unsigned int size); + +static inline unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf) +{ + return sheaf->size; +} + +#define __alloc_objs(KMALLOC, GFP, TYPE, COUNT) \ +({ \ + const size_t __obj_size = size_mul(sizeof(TYPE), COUNT); \ + (TYPE *)KMALLOC(__obj_size, GFP); \ +}) + +#define kzalloc_obj(P, ...) \ + __alloc_objs(kzalloc, default_gfp(__VA_ARGS__), typeof(P), 1) #endif /* _TOOLS_SLAB_H */ diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h index 5a00b8b2cf9f..cfb6ddeb292b 100644 --- a/tools/include/linux/static_call_types.h +++ b/tools/include/linux/static_call_types.h @@ -25,6 +25,8 @@ #define STATIC_CALL_SITE_INIT 2UL /* init section */ #define STATIC_CALL_SITE_FLAGS 3UL +#ifndef __ASSEMBLY__ + /* * The static call site table needs to be created by external tooling (objtool * or a compiler plugin). @@ -100,4 +102,6 @@ struct static_call_key { #endif /* CONFIG_HAVE_STATIC_CALL */ +#endif /* __ASSEMBLY__ */ + #endif /* _STATIC_CALL_TYPES_H */ diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index 8499f509f03e..51ad3cf4fa82 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -44,6 +44,20 @@ static inline bool strstarts(const char *str, const char *prefix) return strncmp(str, prefix, strlen(prefix)) == 0; } +/* + * Checks if a string ends with another. + */ +static inline bool str_ends_with(const char *str, const char *substr) +{ + size_t len = strlen(str); + size_t sublen = strlen(substr); + + if (sublen > len) + return false; + + return !strcmp(str + len - sublen, substr); +} + extern char * __must_check skip_spaces(const char *); extern char *strim(char *); diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h index 8519386acd23..d41f8a261bce 100644 --- a/tools/include/linux/types.h +++ b/tools/include/linux/types.h @@ -42,6 +42,8 @@ typedef __s16 s16; typedef __u8 u8; typedef __s8 s8; +typedef unsigned long long ullong; + #ifdef __CHECKER__ #define __bitwise __attribute__((bitwise)) #else @@ -86,6 +88,14 @@ typedef struct { # define __aligned_u64 __u64 __attribute__((aligned(8))) #endif +#ifndef __aligned_be64 +# define __aligned_be64 __be64 __attribute__((aligned(8))) +#endif + +#ifndef __aligned_le64 +# define __aligned_le64 __le64 __attribute__((aligned(8))) +#endif + struct list_head { struct list_head *next, *prev; }; diff --git a/tools/include/linux/unaligned.h b/tools/include/linux/unaligned.h index 395a4464fe73..d51ddafed138 100644 --- a/tools/include/linux/unaligned.h +++ b/tools/include/linux/unaligned.h @@ -6,9 +6,6 @@ * This is the most generic implementation of unaligned accesses * and should work almost anywhere. */ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wpacked" -#pragma GCC diagnostic ignored "-Wattributes" #include <vdso/unaligned.h> #define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) @@ -143,6 +140,5 @@ static inline u64 get_unaligned_be48(const void *p) { return __get_unaligned_be48(p); } -#pragma GCC diagnostic pop #endif /* __LINUX_UNALIGNED_H */ diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index a1f55fb24bb3..00fd2e566d75 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -17,29 +17,57 @@ endif # it defaults to this nolibc directory. OUTPUT ?= $(CURDIR)/ -ifeq ($(V),1) -Q= -else -Q=@ -endif - -nolibc_arch := $(patsubst arm64,aarch64,$(ARCH)) -arch_file := arch-$(nolibc_arch).h +architectures := arm arm64 loongarch m68k mips openrisc parisc powerpc riscv s390 sh sparc x86 +arch_files := arch.h $(addsuffix .h, $(addprefix arch-, $(architectures))) all_files := \ + alloca.h \ + assert.h \ + byteswap.h \ compiler.h \ crt.h \ ctype.h \ + dirent.h \ + elf.h \ + endian.h \ + err.h \ errno.h \ + fcntl.h \ + getopt.h \ + inttypes.h \ + limits.h \ + math.h \ nolibc.h \ + poll.h \ + sched.h \ signal.h \ stackprotector.h \ std.h \ stdarg.h \ stdbool.h \ + stddef.h \ stdint.h \ stdlib.h \ string.h \ sys.h \ + sys/auxv.h \ + sys/ioctl.h \ + sys/mman.h \ + sys/mount.h \ + sys/prctl.h \ + sys/ptrace.h \ + sys/random.h \ + sys/reboot.h \ + sys/resource.h \ + sys/select.h \ + sys/stat.h \ + sys/syscall.h \ + sys/sysmacros.h \ + sys/time.h \ + sys/timerfd.h \ + sys/types.h \ + sys/uio.h \ + sys/utsname.h \ + sys/wait.h \ time.h \ types.h \ unistd.h \ @@ -55,7 +83,7 @@ help: @echo "Supported targets under nolibc:" @echo " all call \"headers\"" @echo " clean clean the sysroot" - @echo " headers prepare a sysroot in tools/include/nolibc/sysroot" + @echo " headers prepare a multi-arch sysroot in \$${OUTPUT}sysroot" @echo " headers_standalone like \"headers\", and also install kernel headers" @echo " help this help" @echo "" @@ -66,28 +94,22 @@ help: @echo " OUTPUT = $(OUTPUT)" @echo "" -# Note: when ARCH is "x86" we concatenate both x86_64 and i386 +# installs headers for all archs at once. headers: - $(Q)mkdir -p $(OUTPUT)sysroot - $(Q)mkdir -p $(OUTPUT)sysroot/include - $(Q)cp $(all_files) $(OUTPUT)sysroot/include/ - $(Q)if [ "$(ARCH)" = "x86" ]; then \ - sed -e \ - 's,^#ifndef _NOLIBC_ARCH_X86_64_H,#if !defined(_NOLIBC_ARCH_X86_64_H) \&\& defined(__x86_64__),' \ - arch-x86_64.h; \ - sed -e \ - 's,^#ifndef _NOLIBC_ARCH_I386_H,#if !defined(_NOLIBC_ARCH_I386_H) \&\& !defined(__x86_64__),' \ - arch-i386.h; \ - elif [ -e "$(arch_file)" ]; then \ - cat $(arch_file); \ - else \ - echo "Fatal: architecture $(ARCH) not yet supported by nolibc." >&2; \ - exit 1; \ - fi > $(OUTPUT)sysroot/include/arch.h + $(Q)mkdir -p "$(OUTPUT)sysroot/include" + $(Q)cp --parents $(arch_files) $(all_files) "$(OUTPUT)sysroot/include/" headers_standalone: headers - $(Q)$(MAKE) -C $(srctree) headers $(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)sysroot +CFLAGS_s390 := -m64 +CFLAGS := $(CFLAGS_$(ARCH)) + +headers_check: headers_standalone + $(Q)for header in $(filter-out crt.h std.h,$(all_files)); do \ + $(CC) $(CFLAGS) $(CLANG_CROSS_FLAGS) -Wall -Werror -nostdinc -fsyntax-only -x c /dev/null \ + -I$(or $(objtree),$(srctree))/usr/include -include $$header -include $$header || exit 1; \ + done + clean: $(call QUIET_CLEAN, nolibc) rm -rf "$(OUTPUT)sysroot" diff --git a/tools/include/nolibc/alloca.h b/tools/include/nolibc/alloca.h new file mode 100644 index 000000000000..448233a79e6e --- /dev/null +++ b/tools/include/nolibc/alloca.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * alloca() for NOLIBC + * Copyright (C) 2026 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_ALLOCA_H +#define _NOLIBC_ALLOCA_H + +#define alloca(size) __builtin_alloca(size) + +#endif /* _NOLIBC_ALLOCA_H */ diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h index 6180ff99ab43..8681922e05ca 100644 --- a/tools/include/nolibc/arch-arm.h +++ b/tools/include/nolibc/arch-arm.h @@ -7,8 +7,11 @@ #ifndef _NOLIBC_ARCH_ARM_H #define _NOLIBC_ARCH_ARM_H +#include <linux/unistd.h> + #include "compiler.h" #include "crt.h" +#include "std.h" /* Syscalls for ARM in ARM or Thumb modes : * - registers are 32-bit @@ -50,7 +53,7 @@ #endif /* end THUMB */ -#define my_syscall0(num) \ +#define __nolibc_syscall0(num) \ ({ \ register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ register long _arg1 __asm__ ("r0"); \ @@ -67,7 +70,7 @@ _arg1; \ }) -#define my_syscall1(num, arg1) \ +#define __nolibc_syscall1(num, arg1) \ ({ \ register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ register long _arg1 __asm__ ("r0") = (long)(arg1); \ @@ -84,7 +87,7 @@ _arg1; \ }) -#define my_syscall2(num, arg1, arg2) \ +#define __nolibc_syscall2(num, arg1, arg2) \ ({ \ register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ register long _arg1 __asm__ ("r0") = (long)(arg1); \ @@ -102,7 +105,7 @@ _arg1; \ }) -#define my_syscall3(num, arg1, arg2, arg3) \ +#define __nolibc_syscall3(num, arg1, arg2, arg3) \ ({ \ register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ register long _arg1 __asm__ ("r0") = (long)(arg1); \ @@ -121,7 +124,7 @@ _arg1; \ }) -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +#define __nolibc_syscall4(num, arg1, arg2, arg3, arg4) \ ({ \ register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ register long _arg1 __asm__ ("r0") = (long)(arg1); \ @@ -141,7 +144,7 @@ _arg1; \ }) -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +#define __nolibc_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ({ \ register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ register long _arg1 __asm__ ("r0") = (long)(arg1); \ @@ -162,7 +165,7 @@ _arg1; \ }) -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +#define __nolibc_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ ({ \ register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ register long _arg1 __asm__ ("r0") = (long)(arg1); \ @@ -184,16 +187,23 @@ _arg1; \ }) +#ifndef NOLIBC_NO_RUNTIME /* startup code */ -void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __nolibc_no_stack_protector _start(void) { __asm__ volatile ( "mov r0, sp\n" /* save stack pointer to %r0, as arg1 of _start_c */ - "and ip, r0, #-8\n" /* sp must be 8-byte aligned in the callee */ - "mov sp, ip\n" "bl _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); } +#endif /* NOLIBC_NO_RUNTIME */ + +static __attribute__((unused)) +int _sys_ftruncate64(int fd, uint32_t length0, uint32_t length1) +{ + return __nolibc_syscall4(__NR_ftruncate64, fd, 0, length0, length1); +} +#define _sys_ftruncate64 _sys_ftruncate64 #endif /* _NOLIBC_ARCH_ARM_H */ diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-arm64.h index 06fdef7b291a..814bcc13b4b2 100644 --- a/tools/include/nolibc/arch-aarch64.h +++ b/tools/include/nolibc/arch-arm64.h @@ -1,16 +1,16 @@ /* SPDX-License-Identifier: LGPL-2.1 OR MIT */ /* - * AARCH64 specific definitions for NOLIBC + * ARM64 specific definitions for NOLIBC * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ -#ifndef _NOLIBC_ARCH_AARCH64_H -#define _NOLIBC_ARCH_AARCH64_H +#ifndef _NOLIBC_ARCH_ARM64_H +#define _NOLIBC_ARCH_ARM64_H #include "compiler.h" #include "crt.h" -/* Syscalls for AARCH64 : +/* Syscalls for ARM64 : * - registers are 64-bit * - stack is 16-byte aligned * - syscall number is passed in x8 @@ -22,7 +22,7 @@ * don't have to experience issues with register constraints. */ -#define my_syscall0(num) \ +#define __nolibc_syscall0(num) \ ({ \ register long _num __asm__ ("x8") = (num); \ register long _arg1 __asm__ ("x0"); \ @@ -36,7 +36,7 @@ _arg1; \ }) -#define my_syscall1(num, arg1) \ +#define __nolibc_syscall1(num, arg1) \ ({ \ register long _num __asm__ ("x8") = (num); \ register long _arg1 __asm__ ("x0") = (long)(arg1); \ @@ -51,7 +51,7 @@ _arg1; \ }) -#define my_syscall2(num, arg1, arg2) \ +#define __nolibc_syscall2(num, arg1, arg2) \ ({ \ register long _num __asm__ ("x8") = (num); \ register long _arg1 __asm__ ("x0") = (long)(arg1); \ @@ -67,7 +67,7 @@ _arg1; \ }) -#define my_syscall3(num, arg1, arg2, arg3) \ +#define __nolibc_syscall3(num, arg1, arg2, arg3) \ ({ \ register long _num __asm__ ("x8") = (num); \ register long _arg1 __asm__ ("x0") = (long)(arg1); \ @@ -84,7 +84,7 @@ _arg1; \ }) -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +#define __nolibc_syscall4(num, arg1, arg2, arg3, arg4) \ ({ \ register long _num __asm__ ("x8") = (num); \ register long _arg1 __asm__ ("x0") = (long)(arg1); \ @@ -102,7 +102,7 @@ _arg1; \ }) -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +#define __nolibc_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ({ \ register long _num __asm__ ("x8") = (num); \ register long _arg1 __asm__ ("x0") = (long)(arg1); \ @@ -121,7 +121,7 @@ _arg1; \ }) -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +#define __nolibc_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ ({ \ register long _num __asm__ ("x8") = (num); \ register long _arg1 __asm__ ("x0") = (long)(arg1); \ @@ -141,14 +141,15 @@ _arg1; \ }) +#ifndef NOLIBC_NO_RUNTIME /* startup code */ -void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __nolibc_no_stack_protector _start(void) { __asm__ volatile ( "mov x0, sp\n" /* save stack pointer to x0, as arg1 of _start_c */ - "and sp, x0, -16\n" /* sp must be 16-byte aligned in the callee */ "bl _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); } -#endif /* _NOLIBC_ARCH_AARCH64_H */ +#endif /* NOLIBC_NO_RUNTIME */ +#endif /* _NOLIBC_ARCH_ARM64_H */ diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h deleted file mode 100644 index ff5afc35bbd8..000000000000 --- a/tools/include/nolibc/arch-i386.h +++ /dev/null @@ -1,180 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * i386 specific definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> - */ - -#ifndef _NOLIBC_ARCH_I386_H -#define _NOLIBC_ARCH_I386_H - -#include "compiler.h" -#include "crt.h" - -/* Syscalls for i386 : - * - mostly similar to x86_64 - * - registers are 32-bit - * - syscall number is passed in eax - * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively - * - all registers are preserved (except eax of course) - * - the system call is performed by calling int $0x80 - * - syscall return comes in eax - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - * - the syscall number is always specified last in order to allow to force - * some registers before (gcc refuses a %-register at the last position). - * - * Also, i386 supports the old_select syscall if newselect is not available - */ -#define __ARCH_WANT_SYS_OLD_SELECT - -#define my_syscall0(num) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - register long _arg3 __asm__ ("edx") = (long)(arg3); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - register long _arg3 __asm__ ("edx") = (long)(arg3); \ - register long _arg4 __asm__ ("esi") = (long)(arg4); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - register long _arg3 __asm__ ("edx") = (long)(arg3); \ - register long _arg4 __asm__ ("esi") = (long)(arg4); \ - register long _arg5 __asm__ ("edi") = (long)(arg5); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - long _eax = (long)(num); \ - long _arg6 = (long)(arg6); /* Always in memory */ \ - __asm__ volatile ( \ - "pushl %[_arg6]\n\t" \ - "pushl %%ebp\n\t" \ - "movl 4(%%esp),%%ebp\n\t" \ - "int $0x80\n\t" \ - "popl %%ebp\n\t" \ - "addl $4,%%esp\n\t" \ - : "+a"(_eax) /* %eax */ \ - : "b"(arg1), /* %ebx */ \ - "c"(arg2), /* %ecx */ \ - "d"(arg3), /* %edx */ \ - "S"(arg4), /* %esi */ \ - "D"(arg5), /* %edi */ \ - [_arg6]"m"(_arg6) /* memory */ \ - : "memory", "cc" \ - ); \ - _eax; \ -}) - -/* startup code */ -/* - * i386 System V ABI mandates: - * 1) last pushed argument must be 16-byte aligned. - * 2) The deepest stack frame should be set to zero - * - */ -void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) -{ - __asm__ volatile ( - "xor %ebp, %ebp\n" /* zero the stack frame */ - "mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */ - "add $12, %esp\n" /* avoid over-estimating after the 'and' & 'sub' below */ - "and $-16, %esp\n" /* the %esp must be 16-byte aligned on 'call' */ - "sub $12, %esp\n" /* sub 12 to keep it aligned after the push %eax */ - "push %eax\n" /* push arg1 on stack to support plain stack modes too */ - "call _start_c\n" /* transfer to c runtime */ - "hlt\n" /* ensure it does not return */ - ); - __nolibc_entrypoint_epilogue(); -} - -#endif /* _NOLIBC_ARCH_I386_H */ diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h index fb519545959e..3abed96a15e8 100644 --- a/tools/include/nolibc/arch-loongarch.h +++ b/tools/include/nolibc/arch-loongarch.h @@ -24,7 +24,7 @@ #define _NOLIBC_SYSCALL_CLOBBERLIST \ "memory", "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8" -#define my_syscall0(num) \ +#define __nolibc_syscall0(num) \ ({ \ register long _num __asm__ ("a7") = (num); \ register long _arg1 __asm__ ("a0"); \ @@ -38,7 +38,7 @@ _arg1; \ }) -#define my_syscall1(num, arg1) \ +#define __nolibc_syscall1(num, arg1) \ ({ \ register long _num __asm__ ("a7") = (num); \ register long _arg1 __asm__ ("a0") = (long)(arg1); \ @@ -52,7 +52,7 @@ _arg1; \ }) -#define my_syscall2(num, arg1, arg2) \ +#define __nolibc_syscall2(num, arg1, arg2) \ ({ \ register long _num __asm__ ("a7") = (num); \ register long _arg1 __asm__ ("a0") = (long)(arg1); \ @@ -68,7 +68,7 @@ _arg1; \ }) -#define my_syscall3(num, arg1, arg2, arg3) \ +#define __nolibc_syscall3(num, arg1, arg2, arg3) \ ({ \ register long _num __asm__ ("a7") = (num); \ register long _arg1 __asm__ ("a0") = (long)(arg1); \ @@ -85,7 +85,7 @@ _arg1; \ }) -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +#define __nolibc_syscall4(num, arg1, arg2, arg3, arg4) \ ({ \ register long _num __asm__ ("a7") = (num); \ register long _arg1 __asm__ ("a0") = (long)(arg1); \ @@ -103,7 +103,7 @@ _arg1; \ }) -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +#define __nolibc_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ({ \ register long _num __asm__ ("a7") = (num); \ register long _arg1 __asm__ ("a0") = (long)(arg1); \ @@ -122,7 +122,7 @@ _arg1; \ }) -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +#define __nolibc_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ ({ \ register long _num __asm__ ("a7") = (num); \ register long _arg1 __asm__ ("a0") = (long)(arg1); \ @@ -142,21 +142,16 @@ _arg1; \ }) -#if __loongarch_grlen == 32 -#define LONG_BSTRINS "bstrins.w" -#else /* __loongarch_grlen == 64 */ -#define LONG_BSTRINS "bstrins.d" -#endif - +#ifndef NOLIBC_NO_RUNTIME /* startup code */ -void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __nolibc_no_stack_protector _start(void) { __asm__ volatile ( "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ - LONG_BSTRINS " $sp, $zero, 3, 0\n" /* $sp must be 16-byte aligned */ "bl _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); } +#endif /* NOLIBC_NO_RUNTIME */ #endif /* _NOLIBC_ARCH_LOONGARCH_H */ diff --git a/tools/include/nolibc/arch-m68k.h b/tools/include/nolibc/arch-m68k.h new file mode 100644 index 000000000000..341f434a530c --- /dev/null +++ b/tools/include/nolibc/arch-m68k.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * m68k specific definitions for NOLIBC + * Copyright (C) 2025 Daniel Palmer<daniel@thingy.jp> + * + * Roughly based on one or more of the other arch files. + * + */ + +#ifndef _NOLIBC_ARCH_M68K_H +#define _NOLIBC_ARCH_M68K_H + +#include "compiler.h" +#include "crt.h" + +#define _NOLIBC_SYSCALL_CLOBBERLIST "memory" + +#define __nolibc_syscall0(num) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define __nolibc_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_arg1) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define __nolibc_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_arg1), "r"(_arg2) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define __nolibc_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define __nolibc_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + register long _arg4 __asm__ ("d4") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define __nolibc_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + register long _arg4 __asm__ ("d4") = (long)(arg4); \ + register long _arg5 __asm__ ("d5") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define __nolibc_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + register long _arg4 __asm__ ("d4") = (long)(arg4); \ + register long _arg5 __asm__ ("d5") = (long)(arg5); \ + register long _arg6 __asm__ ("a0") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#ifndef NOLIBC_NO_RUNTIME +void _start(void); +void __attribute__((weak, noreturn)) __nolibc_entrypoint __nolibc_no_stack_protector _start(void) +{ + __asm__ volatile ( + "movel %sp, %sp@-\n" + "jsr _start_c\n" + ); + __nolibc_entrypoint_epilogue(); +} +#endif /* NOLIBC_NO_RUNTIME */ + +#endif /* _NOLIBC_ARCH_M68K_H */ diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 1791a8ce58da..26ad413cec62 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -7,10 +7,13 @@ #ifndef _NOLIBC_ARCH_MIPS_H #define _NOLIBC_ARCH_MIPS_H +#include <linux/unistd.h> + #include "compiler.h" #include "crt.h" +#include "std.h" -#if !defined(_ABIO32) +#if !defined(_ABIO32) && !defined(_ABIN32) && !defined(_ABI64) #error Unsupported MIPS ABI #endif @@ -32,21 +35,55 @@ * - the arguments are cast to long and assigned into the target registers * which are then simply passed as registers to the asm code, so that we * don't have to experience issues with register constraints. + * + * Syscalls for MIPS ABI N32, same as ABI O32 with the following differences : + * - arguments are in a0, a1, a2, a3, t0, t1, t2, t3. + * t0..t3 are also known as a4..a7. + * - stack is 16-byte aligned */ +#if !defined(__mips_isa_rev) || __mips_isa_rev < 6 +#define _NOLIBC_SYSCALL_CLOBBER_HI_LO "hi", "lo" +#else +#define _NOLIBC_SYSCALL_CLOBBER_HI_LO "$0" +#endif + +#if defined(_ABIO32) + #define _NOLIBC_SYSCALL_CLOBBERLIST \ - "memory", "cc", "at", "v1", "hi", "lo", \ - "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" + "memory", "cc", "at", "v1", \ + "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", \ + _NOLIBC_SYSCALL_CLOBBER_HI_LO + +#define _NOLIBC_SYSCALL_STACK_RESERVE "addiu $sp, $sp, -32\n" +#define _NOLIBC_SYSCALL_STACK_UNRESERVE "addiu $sp, $sp, 32\n" + +#define _NOLIBC_SYSCALL_REG register long + +#else /* _ABIN32 || _ABI64 */ + +/* binutils, GCC and clang disagree about register aliases, use numbers instead. */ +#define _NOLIBC_SYSCALL_CLOBBERLIST \ + "memory", "cc", "at", "v1", \ + "10", "11", "12", "13", "14", "15", "24", "25", \ + _NOLIBC_SYSCALL_CLOBBER_HI_LO + +#define _NOLIBC_SYSCALL_STACK_RESERVE +#define _NOLIBC_SYSCALL_STACK_UNRESERVE + +#define _NOLIBC_SYSCALL_REG register long long -#define my_syscall0(num) \ +#endif /* _ABIO32 */ + +#define __nolibc_syscall0(num) \ ({ \ - register long _num __asm__ ("v0") = (num); \ - register long _arg4 __asm__ ("a3"); \ + _NOLIBC_SYSCALL_REG _num __asm__ ("v0") = (num); \ + _NOLIBC_SYSCALL_REG _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "r"(_num) \ : _NOLIBC_SYSCALL_CLOBBERLIST \ @@ -54,16 +91,16 @@ _arg4 ? -_num : _num; \ }) -#define my_syscall1(num, arg1) \ +#define __nolibc_syscall1(num, arg1) \ ({ \ - register long _num __asm__ ("v0") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg4 __asm__ ("a3"); \ + _NOLIBC_SYSCALL_REG _num __asm__ ("v0") = (num); \ + _NOLIBC_SYSCALL_REG _arg1 __asm__ ("a0") = __nolibc_arg_to_reg(arg1); \ + _NOLIBC_SYSCALL_REG _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1) \ @@ -72,17 +109,17 @@ _arg4 ? -_num : _num; \ }) -#define my_syscall2(num, arg1, arg2) \ +#define __nolibc_syscall2(num, arg1, arg2) \ ({ \ - register long _num __asm__ ("v0") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg4 __asm__ ("a3"); \ + _NOLIBC_SYSCALL_REG _num __asm__ ("v0") = (num); \ + _NOLIBC_SYSCALL_REG _arg1 __asm__ ("a0") = __nolibc_arg_to_reg(arg1); \ + _NOLIBC_SYSCALL_REG _arg2 __asm__ ("a1") = __nolibc_arg_to_reg(arg2); \ + _NOLIBC_SYSCALL_REG _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2) \ @@ -91,18 +128,18 @@ _arg4 ? -_num : _num; \ }) -#define my_syscall3(num, arg1, arg2, arg3) \ +#define __nolibc_syscall3(num, arg1, arg2, arg3) \ ({ \ - register long _num __asm__ ("v0") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg3 __asm__ ("a2") = (long)(arg3); \ - register long _arg4 __asm__ ("a3"); \ + _NOLIBC_SYSCALL_REG _num __asm__ ("v0") = (num); \ + _NOLIBC_SYSCALL_REG _arg1 __asm__ ("a0") = __nolibc_arg_to_reg(arg1); \ + _NOLIBC_SYSCALL_REG _arg2 __asm__ ("a1") = __nolibc_arg_to_reg(arg2); \ + _NOLIBC_SYSCALL_REG _arg3 __asm__ ("a2") = __nolibc_arg_to_reg(arg3); \ + _NOLIBC_SYSCALL_REG _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3) \ @@ -111,18 +148,18 @@ _arg4 ? -_num : _num; \ }) -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +#define __nolibc_syscall4(num, arg1, arg2, arg3, arg4) \ ({ \ - register long _num __asm__ ("v0") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg3 __asm__ ("a2") = (long)(arg3); \ - register long _arg4 __asm__ ("a3") = (long)(arg4); \ + _NOLIBC_SYSCALL_REG _num __asm__ ("v0") = (num); \ + _NOLIBC_SYSCALL_REG _arg1 __asm__ ("a0") = __nolibc_arg_to_reg(arg1); \ + _NOLIBC_SYSCALL_REG _arg2 __asm__ ("a1") = __nolibc_arg_to_reg(arg2); \ + _NOLIBC_SYSCALL_REG _arg3 __asm__ ("a2") = __nolibc_arg_to_reg(arg3); \ + _NOLIBC_SYSCALL_REG _arg4 __asm__ ("a3") = __nolibc_arg_to_reg(arg4); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ @@ -131,20 +168,22 @@ _arg4 ? -_num : _num; \ }) -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +#if defined(_ABIO32) + +#define __nolibc_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ({ \ - register long _num __asm__ ("v0") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg3 __asm__ ("a2") = (long)(arg3); \ - register long _arg4 __asm__ ("a3") = (long)(arg4); \ - register long _arg5 = (long)(arg5); \ + _NOLIBC_SYSCALL_REG _num __asm__ ("v0") = (num); \ + _NOLIBC_SYSCALL_REG _arg1 __asm__ ("a0") = __nolibc_arg_to_reg(arg1); \ + _NOLIBC_SYSCALL_REG _arg2 __asm__ ("a1") = __nolibc_arg_to_reg(arg2); \ + _NOLIBC_SYSCALL_REG _arg3 __asm__ ("a2") = __nolibc_arg_to_reg(arg3); \ + _NOLIBC_SYSCALL_REG _arg4 __asm__ ("a3") = __nolibc_arg_to_reg(arg4); \ + _NOLIBC_SYSCALL_REG _arg5 = __nolibc_arg_to_reg(arg5); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "sw %7, 16($sp)\n" \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ @@ -153,22 +192,64 @@ _arg4 ? -_num : _num; \ }) -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +#define __nolibc_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ ({ \ - register long _num __asm__ ("v0") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg3 __asm__ ("a2") = (long)(arg3); \ - register long _arg4 __asm__ ("a3") = (long)(arg4); \ - register long _arg5 = (long)(arg5); \ - register long _arg6 = (long)(arg6); \ + _NOLIBC_SYSCALL_REG _num __asm__ ("v0") = (num); \ + _NOLIBC_SYSCALL_REG _arg1 __asm__ ("a0") = __nolibc_arg_to_reg(arg1); \ + _NOLIBC_SYSCALL_REG _arg2 __asm__ ("a1") = __nolibc_arg_to_reg(arg2); \ + _NOLIBC_SYSCALL_REG _arg3 __asm__ ("a2") = __nolibc_arg_to_reg(arg3); \ + _NOLIBC_SYSCALL_REG _arg4 __asm__ ("a3") = __nolibc_arg_to_reg(arg4); \ + _NOLIBC_SYSCALL_REG _arg5 = __nolibc_arg_to_reg(arg5); \ + _NOLIBC_SYSCALL_REG _arg6 = __nolibc_arg_to_reg(arg6); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "sw %7, 16($sp)\n" \ "sw %8, 20($sp)\n" \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#else /* _ABIN32 || _ABI64 */ + +#define __nolibc_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + _NOLIBC_SYSCALL_REG _num __asm__ ("v0") = (num); \ + _NOLIBC_SYSCALL_REG _arg1 __asm__ ("$4") = __nolibc_arg_to_reg(arg1); \ + _NOLIBC_SYSCALL_REG _arg2 __asm__ ("$5") = __nolibc_arg_to_reg(arg2); \ + _NOLIBC_SYSCALL_REG _arg3 __asm__ ("$6") = __nolibc_arg_to_reg(arg3); \ + _NOLIBC_SYSCALL_REG _arg4 __asm__ ("$7") = __nolibc_arg_to_reg(arg4); \ + _NOLIBC_SYSCALL_REG _arg5 __asm__ ("$8") = __nolibc_arg_to_reg(arg5); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define __nolibc_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + _NOLIBC_SYSCALL_REG _num __asm__ ("v0") = (num); \ + _NOLIBC_SYSCALL_REG _arg1 __asm__ ("$4") = __nolibc_arg_to_reg(arg1); \ + _NOLIBC_SYSCALL_REG _arg2 __asm__ ("$5") = __nolibc_arg_to_reg(arg2); \ + _NOLIBC_SYSCALL_REG _arg3 __asm__ ("$6") = __nolibc_arg_to_reg(arg3); \ + _NOLIBC_SYSCALL_REG _arg4 __asm__ ("$7") = __nolibc_arg_to_reg(arg4); \ + _NOLIBC_SYSCALL_REG _arg5 __asm__ ("$8") = __nolibc_arg_to_reg(arg5); \ + _NOLIBC_SYSCALL_REG _arg6 __asm__ ("$9") = __nolibc_arg_to_reg(arg6); \ + \ + __asm__ volatile ( \ + "syscall\n" \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ @@ -178,29 +259,39 @@ _arg4 ? -_num : _num; \ }) +#endif /* _ABIO32 */ + +#ifndef NOLIBC_NO_RUNTIME /* startup code, note that it's called __start on MIPS */ -void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void) +void __start(void); +void __attribute__((weak, noreturn)) __nolibc_entrypoint __nolibc_no_stack_protector __start(void) { __asm__ volatile ( - ".set push\n" - ".set noreorder\n" - "bal 1f\n" /* prime $ra for .cpload */ - "nop\n" - "1:\n" - ".cpload $ra\n" "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ - "addiu $sp, $sp, -4\n" /* space for .cprestore to store $gp */ - ".cprestore 0\n" - "li $t0, -8\n" - "and $sp, $sp, $t0\n" /* $sp must be 8-byte aligned */ +#if defined(_ABIO32) "addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */ +#endif /* _ABIO32 */ "lui $t9, %hi(_start_c)\n" /* ABI requires current function address in $t9 */ "ori $t9, %lo(_start_c)\n" +#if defined(_ABI64) + "lui $t0, %highest(_start_c)\n" + "ori $t0, %higher(_start_c)\n" + "dsll $t0, 0x20\n" + "or $t9, $t0\n" +#endif /* _ABI64 */ "jalr $t9\n" /* transfer to c runtime */ - " nop\n" /* delayed slot */ - ".set pop\n" ); __nolibc_entrypoint_epilogue(); } +#endif /* NOLIBC_NO_RUNTIME */ + +#if defined(_ABIO32) +static __attribute__((unused)) +int _sys_ftruncate64(int fd, uint32_t length0, uint32_t length1) +{ + return __nolibc_syscall4(__NR_ftruncate64, fd, 0, length0, length1); +} +#define _sys_ftruncate64 _sys_ftruncate64 +#endif #endif /* _NOLIBC_ARCH_MIPS_H */ diff --git a/tools/include/nolibc/arch-openrisc.h b/tools/include/nolibc/arch-openrisc.h new file mode 100644 index 000000000000..5ef82fd9cc64 --- /dev/null +++ b/tools/include/nolibc/arch-openrisc.h @@ -0,0 +1,160 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * OpenRISC specific definitions for NOLIBC + * Copyright (C) 2026 Thomas Weißschuh <linux@weissschuh.net> + */ + +#ifndef _NOLIBC_ARCH_OPENRISC_H +#define _NOLIBC_ARCH_OPENRISC_H + +#include "compiler.h" +#include "crt.h" + +/* + * Syscalls for OpenRISC: + * - syscall number is passed in r11 + * - arguments are in r3, r4, r5, r6, r7, r8 + * - the system call is performed by calling l.sys 1 + * - syscall return value is in r11 + */ + +#define _NOLIBC_SYSCALL_CLOBBERLIST \ + "r12", "r13", "r15", "r17", "r19", "r21", "r23", "r25", "r27", "r29", "r31", "memory" + +#define __nolibc_syscall0(num) \ +({ \ + register long _num __asm__ ("r11") = (num); \ + \ + __asm__ volatile ( \ + "l.sys 1\n" \ + : "+r"(_num) \ + : \ + : "r3", "r4", "r5", "r6", "r7", "r8", \ + _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define __nolibc_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("r11") = (num); \ + register long _arg1 __asm__ ("r3") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "l.sys 1\n" \ + : "+r"(_num) \ + : "r"(_arg1) \ + : "r4", "r5", "r6", "r7", "r8", _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define __nolibc_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("r11") = (num); \ + register long _arg1 __asm__ ("r3") = (long)(arg1); \ + register long _arg2 __asm__ ("r4") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "l.sys 1\n" \ + : "+r"(_num) \ + : "r"(_arg1), "r"(_arg2) \ + : "r5", "r6", "r7", "r8", _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define __nolibc_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("r11") = (num); \ + register long _arg1 __asm__ ("r3") = (long)(arg1); \ + register long _arg2 __asm__ ("r4") = (long)(arg2); \ + register long _arg3 __asm__ ("r5") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "l.sys 1\n" \ + : "+r"(_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3) \ + : "r6", "r7", "r8", _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define __nolibc_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("r11") = (num); \ + register long _arg1 __asm__ ("r3") = (long)(arg1); \ + register long _arg2 __asm__ ("r4") = (long)(arg2); \ + register long _arg3 __asm__ ("r5") = (long)(arg3); \ + register long _arg4 __asm__ ("r6") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "l.sys 1\n" \ + : "+r"(_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ + : "r7", "r8", _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define __nolibc_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("r11") = (num); \ + register long _arg1 __asm__ ("r3") = (long)(arg1); \ + register long _arg2 __asm__ ("r4") = (long)(arg2); \ + register long _arg3 __asm__ ("r5") = (long)(arg3); \ + register long _arg4 __asm__ ("r6") = (long)(arg4); \ + register long _arg5 __asm__ ("r7") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "l.sys 1\n" \ + : "+r"(_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : "r8", _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define __nolibc_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("r11") = (num); \ + register long _arg1 __asm__ ("r3") = (long)(arg1); \ + register long _arg2 __asm__ ("r4") = (long)(arg2); \ + register long _arg3 __asm__ ("r5") = (long)(arg3); \ + register long _arg4 __asm__ ("r6") = (long)(arg4); \ + register long _arg5 __asm__ ("r7") = (long)(arg5); \ + register long _arg6 __asm__ ("r8") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "l.sys 1\n" \ + : "+r"(_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#ifndef NOLIBC_NO_RUNTIME +/* startup code */ +void _start_wrapper(void); +void __attribute__((weak,noreturn)) +__nolibc_entrypoint __nolibc_no_stack_protector +_start_wrapper(void) +{ + __asm__ volatile ( + ".global _start\n" /* The C function will have a prologue, */ + ".type _start, @function\n" /* corrupting "sp/r1" */ + ".weak _start\n" + "_start:\n" + + "l.jal _start_c\n" /* transfer to c runtime */ + "l.or r3,r1,r1\n" /* save stack pointer to r3, as arg1 of _start_c */ + + ".size _start, .-_start\n" + ); + __nolibc_entrypoint_epilogue(); +} +#endif /* NOLIBC_NO_RUNTIME */ + +#endif /* _NOLIBC_ARCH_OPENRISC_H */ diff --git a/tools/include/nolibc/arch-parisc.h b/tools/include/nolibc/arch-parisc.h new file mode 100644 index 000000000000..417043ecbc53 --- /dev/null +++ b/tools/include/nolibc/arch-parisc.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * parisc/hppa (32-bit) specific definitions for NOLIBC + * Copyright (C) 2026 Thomas Weißschuh <linux@weissschuh.net> + */ + +#ifndef _NOLIBC_ARCH_PARISC_H +#define _NOLIBC_ARCH_PARISC_H + +#if defined(__LP64__) +#error 64-bit not supported +#endif + +#include "compiler.h" +#include "crt.h" + +/* Syscalls for parisc : + * - syscall number is passed in r20 + * - arguments are in r26 to r21 + * - the system call is performed by calling "ble 0x100(%sr2, %r0)", + * the instruction after that is in the delay slot and executed before + * the jump to 0x100 actually happens, use it to load the syscall number + * - syscall return comes in r28 + * - the arguments are cast to long and assigned into the target + * registers which are then simply passed as registers to the asm code, + * so that we don't have to experience issues with register constraints. + */ + +#define _NOLIBC_SYSCALL_CLOBBERLIST \ + "memory", "%r1", "%r2", "%r4", "%r20", "%r29", "%r31" + +#define __nolibc_syscall0(num) \ +({ \ + register long _ret __asm__ ("r28"); \ + \ + __asm__ volatile ( \ + "ble 0x100(%%sr2, %%r0)\n\t" \ + "copy %1, %%r20\n\t" \ + : "=r"(_ret) \ + : "r"(num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST, \ + "%r21", "%r22", "%r23", "%r24", "%r25", "%r26" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall1(num, arg1) \ +({ \ + register long _ret __asm__ ("r28"); \ + register long _arg1 __asm__ ("r26") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "ble 0x100(%%sr2, %%r0)\n\t" \ + "copy %2, %%r20\n\t" \ + : "=r"(_ret), \ + "+r"(_arg1) \ + : "r"(num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST, \ + "%r21", "%r22", "%r23", "%r24", "%r25" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall2(num, arg1, arg2) \ +({ \ + register long _ret __asm__ ("r28"); \ + register long _arg1 __asm__ ("r26") = (long)(arg1); \ + register long _arg2 __asm__ ("r25") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "ble 0x100(%%sr2, %%r0)\n\t" \ + "copy %3, %%r20\n\t" \ + : "=r"(_ret), \ + "+r"(_arg1), "+r"(_arg2) \ + : "r"(num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST, \ + "%r21", "%r22", "%r23", "%r24" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _ret __asm__ ("r28"); \ + register long _arg1 __asm__ ("r26") = (long)(arg1); \ + register long _arg2 __asm__ ("r25") = (long)(arg2); \ + register long _arg3 __asm__ ("r24") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "ble 0x100(%%sr2, %%r0)\n\t" \ + "copy %4, %%r20\n\t" \ + : "=r"(_ret), \ + "+r"(_arg1), "+r"(_arg2), "+r"(_arg3) \ + : "r"(num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST, \ + "%r21", "%r22", "%r23" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _ret __asm__ ("r28"); \ + register long _arg1 __asm__ ("r26") = (long)(arg1); \ + register long _arg2 __asm__ ("r25") = (long)(arg2); \ + register long _arg3 __asm__ ("r24") = (long)(arg3); \ + register long _arg4 __asm__ ("r23") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "ble 0x100(%%sr2, %%r0)\n\t" \ + "copy %5, %%r20\n\t" \ + : "=r"(_ret), \ + "+r"(_arg1), "+r"(_arg2), "+r"(_arg3), "+r"(_arg4) \ + : "r"(num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST, \ + "%r21", "%r22" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _ret __asm__ ("r28"); \ + register long _arg1 __asm__ ("r26") = (long)(arg1); \ + register long _arg2 __asm__ ("r25") = (long)(arg2); \ + register long _arg3 __asm__ ("r24") = (long)(arg3); \ + register long _arg4 __asm__ ("r23") = (long)(arg4); \ + register long _arg5 __asm__ ("r22") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "ble 0x100(%%sr2, %%r0)\n\t" \ + "copy %6, %%r20\n\t" \ + : "=r"(_ret), \ + "+r"(_arg1), "+r"(_arg2), "+r"(_arg3), "+r"(_arg4), \ + "+r"(_arg5) \ + : "r"(num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST, \ + "%r21" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _ret __asm__ ("r28"); \ + register long _arg1 __asm__ ("r26") = (long)(arg1); \ + register long _arg2 __asm__ ("r25") = (long)(arg2); \ + register long _arg3 __asm__ ("r24") = (long)(arg3); \ + register long _arg4 __asm__ ("r23") = (long)(arg4); \ + register long _arg5 __asm__ ("r22") = (long)(arg5); \ + register long _arg6 __asm__ ("r21") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "ble 0x100(%%sr2, %%r0)\n\t" \ + "copy %7, %%r20\n\t" \ + : "=r"(_ret), \ + "+r"(_arg1), "+r"(_arg2), "+r"(_arg3), "+r"(_arg4), \ + "+r"(_arg5), "+r"(_arg6) \ + : "r"(num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _ret; \ +}) + +#ifndef NOLIBC_NO_RUNTIME +/* startup code */ +void __attribute__((weak, noreturn)) __nolibc_entrypoint __nolibc_no_stack_protector _start(void) +{ + __asm__ volatile ( + ".import $global$\n" /* Set up the dp register */ + "ldil L%$global$, %dp\n" + "ldo R%$global$(%r27), %dp\n" + + "b _start_c\n" /* Call _start_c, the load below is executed first */ + + "ldo -4(%r24), %r26\n" /* The sp register is special on parisc. + * r24 points to argv. Subtract 4 to get &argc. + * Pass that as first argument to _start_c. + */ + ); + __nolibc_entrypoint_epilogue(); +} +#endif /* NOLIBC_NO_RUNTIME */ + +#endif /* _NOLIBC_ARCH_PARISC_H */ diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h index ee2fdb8d601d..a1ab91d55384 100644 --- a/tools/include/nolibc/arch-powerpc.h +++ b/tools/include/nolibc/arch-powerpc.h @@ -7,8 +7,11 @@ #ifndef _NOLIBC_ARCH_POWERPC_H #define _NOLIBC_ARCH_POWERPC_H +#include <linux/unistd.h> + #include "compiler.h" #include "crt.h" +#include "std.h" /* Syscalls for PowerPC : * - stack is 16-byte aligned @@ -25,7 +28,7 @@ #define _NOLIBC_SYSCALL_CLOBBERLIST \ "memory", "cr0", "r12", "r11", "r10", "r9" -#define my_syscall0(num) \ +#define __nolibc_syscall0(num) \ ({ \ register long _ret __asm__ ("r3"); \ register long _num __asm__ ("r0") = (num); \ @@ -42,7 +45,7 @@ _ret; \ }) -#define my_syscall1(num, arg1) \ +#define __nolibc_syscall1(num, arg1) \ ({ \ register long _ret __asm__ ("r3"); \ register long _num __asm__ ("r0") = (num); \ @@ -61,7 +64,7 @@ }) -#define my_syscall2(num, arg1, arg2) \ +#define __nolibc_syscall2(num, arg1, arg2) \ ({ \ register long _ret __asm__ ("r3"); \ register long _num __asm__ ("r0") = (num); \ @@ -81,7 +84,7 @@ }) -#define my_syscall3(num, arg1, arg2, arg3) \ +#define __nolibc_syscall3(num, arg1, arg2, arg3) \ ({ \ register long _ret __asm__ ("r3"); \ register long _num __asm__ ("r0") = (num); \ @@ -102,7 +105,7 @@ }) -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +#define __nolibc_syscall4(num, arg1, arg2, arg3, arg4) \ ({ \ register long _ret __asm__ ("r3"); \ register long _num __asm__ ("r0") = (num); \ @@ -125,7 +128,7 @@ }) -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +#define __nolibc_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ({ \ register long _ret __asm__ ("r3"); \ register long _num __asm__ ("r0") = (num); \ @@ -148,7 +151,7 @@ _ret; \ }) -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +#define __nolibc_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ ({ \ register long _ret __asm__ ("r3"); \ register long _num __asm__ ("r0") = (num); \ @@ -177,14 +180,15 @@ * "omit-frame-pointer" fails with __attribute__((no_stack_protector)) but * works with __attribute__((__optimize__("-fno-stack-protector"))) */ -#ifdef __no_stack_protector -#undef __no_stack_protector -#define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector"))) +#ifdef __nolibc_no_stack_protector +#undef __nolibc_no_stack_protector +#define __nolibc_no_stack_protector __attribute__((__optimize__("-fno-stack-protector"))) #endif #endif /* !__powerpc64__ */ +#ifndef NOLIBC_NO_RUNTIME /* startup code */ -void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __nolibc_no_stack_protector _start(void) { #ifdef __powerpc64__ #if _CALL_ELF == 2 @@ -201,7 +205,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s __asm__ volatile ( "mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */ - "clrrdi 1, 1, 4\n" /* align the stack to 16 bytes */ "li 0, 0\n" /* zero the frame pointer */ "stdu 1, -32(1)\n" /* the initial stack frame */ "bl _start_c\n" /* transfer to c runtime */ @@ -209,7 +212,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s #else __asm__ volatile ( "mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */ - "clrrwi 1, 1, 4\n" /* align the stack to 16 bytes */ "li 0, 0\n" /* zero the frame pointer */ "stwu 1, -16(1)\n" /* the initial stack frame */ "bl _start_c\n" /* transfer to c runtime */ @@ -217,5 +219,15 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s #endif __nolibc_entrypoint_epilogue(); } +#endif /* NOLIBC_NO_RUNTIME */ + +#if !defined(__powerpc64__) +static __attribute__((unused)) +int _sys_ftruncate64(int fd, uint32_t length0, uint32_t length1) +{ + return __nolibc_syscall4(__NR_ftruncate64, fd, 0, length0, length1); +} +#define _sys_ftruncate64 _sys_ftruncate64 +#endif #endif /* _NOLIBC_ARCH_POWERPC_H */ diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h index 8827bf936212..1e84ed2e63b3 100644 --- a/tools/include/nolibc/arch-riscv.h +++ b/tools/include/nolibc/arch-riscv.h @@ -21,7 +21,7 @@ * so that we don't have to experience issues with register constraints. */ -#define my_syscall0(num) \ +#define __nolibc_syscall0(num) \ ({ \ register long _num __asm__ ("a7") = (num); \ register long _arg1 __asm__ ("a0"); \ @@ -35,7 +35,7 @@ _arg1; \ }) -#define my_syscall1(num, arg1) \ +#define __nolibc_syscall1(num, arg1) \ ({ \ register long _num __asm__ ("a7") = (num); \ register long _arg1 __asm__ ("a0") = (long)(arg1); \ @@ -49,7 +49,7 @@ _arg1; \ }) -#define my_syscall2(num, arg1, arg2) \ +#define __nolibc_syscall2(num, arg1, arg2) \ ({ \ register long _num __asm__ ("a7") = (num); \ register long _arg1 __asm__ ("a0") = (long)(arg1); \ @@ -65,7 +65,7 @@ _arg1; \ }) -#define my_syscall3(num, arg1, arg2, arg3) \ +#define __nolibc_syscall3(num, arg1, arg2, arg3) \ ({ \ register long _num __asm__ ("a7") = (num); \ register long _arg1 __asm__ ("a0") = (long)(arg1); \ @@ -82,7 +82,7 @@ _arg1; \ }) -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +#define __nolibc_syscall4(num, arg1, arg2, arg3, arg4) \ ({ \ register long _num __asm__ ("a7") = (num); \ register long _arg1 __asm__ ("a0") = (long)(arg1); \ @@ -100,7 +100,7 @@ _arg1; \ }) -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +#define __nolibc_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ({ \ register long _num __asm__ ("a7") = (num); \ register long _arg1 __asm__ ("a0") = (long)(arg1); \ @@ -119,7 +119,7 @@ _arg1; \ }) -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +#define __nolibc_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ ({ \ register long _num __asm__ ("a7") = (num); \ register long _arg1 __asm__ ("a0") = (long)(arg1); \ @@ -139,8 +139,9 @@ _arg1; \ }) +#ifndef NOLIBC_NO_RUNTIME /* startup code */ -void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __nolibc_no_stack_protector _start(void) { __asm__ volatile ( ".option push\n" @@ -148,10 +149,10 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s "lla gp, __global_pointer$\n" ".option pop\n" "mv a0, sp\n" /* save stack pointer to a0, as arg1 of _start_c */ - "andi sp, a0, -16\n" /* sp must be 16-byte aligned */ "call _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); } +#endif /* NOLIBC_NO_RUNTIME */ #endif /* _NOLIBC_ARCH_RISCV_H */ diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h index f9ab83a219b8..3f05c01aecc6 100644 --- a/tools/include/nolibc/arch-s390.h +++ b/tools/include/nolibc/arch-s390.h @@ -5,8 +5,12 @@ #ifndef _NOLIBC_ARCH_S390_H #define _NOLIBC_ARCH_S390_H -#include <asm/signal.h> -#include <asm/unistd.h> + +#include "types.h" + +#include <linux/sched.h> +#include <linux/signal.h> +#include <linux/unistd.h> #include "compiler.h" #include "crt.h" @@ -24,7 +28,7 @@ * */ -#define my_syscall0(num) \ +#define __nolibc_syscall0(num) \ ({ \ register long _num __asm__ ("1") = (num); \ register long _rc __asm__ ("2"); \ @@ -38,7 +42,7 @@ _rc; \ }) -#define my_syscall1(num, arg1) \ +#define __nolibc_syscall1(num, arg1) \ ({ \ register long _num __asm__ ("1") = (num); \ register long _arg1 __asm__ ("2") = (long)(arg1); \ @@ -52,7 +56,7 @@ _arg1; \ }) -#define my_syscall2(num, arg1, arg2) \ +#define __nolibc_syscall2(num, arg1, arg2) \ ({ \ register long _num __asm__ ("1") = (num); \ register long _arg1 __asm__ ("2") = (long)(arg1); \ @@ -67,7 +71,7 @@ _arg1; \ }) -#define my_syscall3(num, arg1, arg2, arg3) \ +#define __nolibc_syscall3(num, arg1, arg2, arg3) \ ({ \ register long _num __asm__ ("1") = (num); \ register long _arg1 __asm__ ("2") = (long)(arg1); \ @@ -83,7 +87,7 @@ _arg1; \ }) -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +#define __nolibc_syscall4(num, arg1, arg2, arg3, arg4) \ ({ \ register long _num __asm__ ("1") = (num); \ register long _arg1 __asm__ ("2") = (long)(arg1); \ @@ -100,7 +104,7 @@ _arg1; \ }) -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +#define __nolibc_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ({ \ register long _num __asm__ ("1") = (num); \ register long _arg1 __asm__ ("2") = (long)(arg1); \ @@ -119,7 +123,7 @@ _arg1; \ }) -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +#define __nolibc_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ ({ \ register long _num __asm__ ("1") = (num); \ register long _arg1 __asm__ ("2") = (long)(arg1); \ @@ -139,8 +143,9 @@ _arg1; \ }) +#ifndef NOLIBC_NO_RUNTIME /* startup code */ -void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __nolibc_no_stack_protector _start(void) { __asm__ volatile ( "lgr %r2, %r15\n" /* save stack pointer to %r2, as arg1 of _start_c */ @@ -150,6 +155,7 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s ); __nolibc_entrypoint_epilogue(); } +#endif /* NOLIBC_NO_RUNTIME */ struct s390_mmap_arg_struct { unsigned long addr; @@ -161,8 +167,8 @@ struct s390_mmap_arg_struct { }; static __attribute__((unused)) -void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, - off_t offset) +void *_sys_mmap(void *addr, size_t length, int prot, int flags, int fd, + off_t offset) { struct s390_mmap_arg_struct args = { .addr = (unsigned long)addr, @@ -173,15 +179,22 @@ void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, .offset = (unsigned long)offset }; - return (void *)my_syscall1(__NR_mmap, &args); + return (void *)__nolibc_syscall1(__NR_mmap, &args); +} +#define _sys_mmap _sys_mmap + +static __attribute__((unused)) +pid_t _sys_fork(void) +{ + return __nolibc_syscall5(__NR_clone, 0, SIGCHLD, 0, 0, 0); } -#define sys_mmap sys_mmap +#define _sys_fork _sys_fork static __attribute__((unused)) -pid_t sys_fork(void) +pid_t _sys_vfork(void) { - return my_syscall5(__NR_clone, 0, SIGCHLD, 0, 0, 0); + return __nolibc_syscall5(__NR_clone, 0, CLONE_VM | CLONE_VFORK | SIGCHLD, 0, 0, 0); } -#define sys_fork sys_fork +#define _sys_vfork _sys_vfork #endif /* _NOLIBC_ARCH_S390_H */ diff --git a/tools/include/nolibc/arch-sh.h b/tools/include/nolibc/arch-sh.h new file mode 100644 index 000000000000..a32378fd621f --- /dev/null +++ b/tools/include/nolibc/arch-sh.h @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * SuperH specific definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +#ifndef _NOLIBC_ARCH_SH_H +#define _NOLIBC_ARCH_SH_H + +#include "compiler.h" +#include "crt.h" + +/* + * Syscalls for SuperH: + * - registers are 32bit wide + * - syscall number is passed in r3 + * - arguments are in r4, r5, r6, r7, r0, r1, r2 + * - the system call is performed by calling trapa #31 + * - syscall return value is in r0 + */ + +#define __nolibc_syscall0(num) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + register long _arg4 __asm__ ("r7") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + register long _arg4 __asm__ ("r7") = (long)(arg4); \ + register long _arg5 __asm__ ("r0") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_arg5) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + register long _arg4 __asm__ ("r7") = (long)(arg4); \ + register long _arg5 __asm__ ("r0") = (long)(arg5); \ + register long _arg6 __asm__ ("r1") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_arg5), "r"(_arg6) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#ifndef NOLIBC_NO_RUNTIME +/* startup code */ +void _start_wrapper(void); +void __attribute__((weak, noreturn)) __nolibc_entrypoint __nolibc_no_stack_protector _start_wrapper(void) +{ + __asm__ volatile ( + ".global _start\n" /* The C function will have a prologue, */ + ".type _start, @function\n" /* corrupting "sp" */ + ".weak _start\n" + "_start:\n" + + "mov sp, r4\n" /* save argc pointer to r4, as arg1 of _start_c */ + "bsr _start_c\n" /* transfer to c runtime */ + "nop\n" /* delay slot */ + + ".size _start, .-_start\n" + ); + __nolibc_entrypoint_epilogue(); +} +#endif /* NOLIBC_NO_RUNTIME */ + +#endif /* _NOLIBC_ARCH_SH_H */ diff --git a/tools/include/nolibc/arch-sparc.h b/tools/include/nolibc/arch-sparc.h new file mode 100644 index 000000000000..ddae9bc10dfe --- /dev/null +++ b/tools/include/nolibc/arch-sparc.h @@ -0,0 +1,209 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * SPARC (32bit and 64bit) specific definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +#ifndef _NOLIBC_ARCH_SPARC_H +#define _NOLIBC_ARCH_SPARC_H + +#include <linux/unistd.h> + +#include "compiler.h" +#include "crt.h" + +/* + * Syscalls for SPARC: + * - registers are native word size + * - syscall number is passed in g1 + * - arguments are in o0-o5 + * - the system call is performed by calling a trap instruction + * - syscall return value is in o0 + * - syscall error flag is in the carry bit of the processor status register + */ + +#ifdef __arch64__ + +#define _NOLIBC_SYSCALL "t 0x6d\n" \ + "bcs,a %%xcc, 1f\n" \ + "sub %%g0, %%o0, %%o0\n" \ + "1:\n" + +#else + +#define _NOLIBC_SYSCALL "t 0x10\n" \ + "bcs,a 1f\n" \ + "sub %%g0, %%o0, %%o0\n" \ + "1:\n" + +#endif /* __arch64__ */ + +#define __nolibc_syscall0(num) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0"); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define __nolibc_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define __nolibc_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define __nolibc_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define __nolibc_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define __nolibc_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + register long _arg5 __asm__ ("o4") = (long)(arg5); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define __nolibc_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + register long _arg5 __asm__ ("o4") = (long)(arg5); \ + register long _arg6 __asm__ ("o5") = (long)(arg6); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#ifndef NOLIBC_NO_RUNTIME +/* startup code */ +void __attribute__((weak, noreturn)) __nolibc_entrypoint __nolibc_no_stack_protector _start(void) +{ + __asm__ volatile ( + /* + * Save argc pointer to o0, as arg1 of _start_c. + * Account for the window save area, which is 16 registers wide. + */ +#ifdef __arch64__ + "add %sp, 128 + 2047, %o0\n" /* on sparc64 / v9 the stack is offset by 2047 */ +#else + "add %sp, 64, %o0\n" +#endif + "b,a _start_c\n" /* transfer to c runtime */ + ); + __nolibc_entrypoint_epilogue(); +} +#endif /* NOLIBC_NO_RUNTIME */ + +static pid_t getpid(void); + +static __attribute__((unused)) +pid_t _sys_fork(void) +{ + pid_t parent, ret; + + parent = getpid(); + ret = __nolibc_syscall0(__NR_fork); + + /* The syscall returns the parent pid in the child instead of 0 */ + if (ret == parent) + return 0; + else + return ret; +} +#define _sys_fork _sys_fork + +static __attribute__((unused)) +pid_t _sys_vfork(void) +{ + pid_t parent, ret; + + parent = getpid(); + ret = __nolibc_syscall0(__NR_vfork); + + /* The syscall returns the parent pid in the child instead of 0 */ + if (ret == parent) + return 0; + else + return ret; +} +#define _sys_vfork _sys_vfork + +#endif /* _NOLIBC_ARCH_SPARC_H */ diff --git a/tools/include/nolibc/arch-x86.h b/tools/include/nolibc/arch-x86.h new file mode 100644 index 000000000000..fe152ac2650b --- /dev/null +++ b/tools/include/nolibc/arch-x86.h @@ -0,0 +1,393 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * x86 specific definitions for NOLIBC (both 32- and 64-bit) + * Copyright (C) 2017-2025 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_ARCH_X86_H +#define _NOLIBC_ARCH_X86_H + +#include "compiler.h" +#include "crt.h" + +#if !defined(__x86_64__) + +/* Syscalls for i386 : + * - mostly similar to x86_64 + * - registers are 32-bit + * - syscall number is passed in eax + * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively + * - all registers are preserved (except eax of course) + * - the system call is performed by calling int $0x80 + * - syscall return comes in eax + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + * + * Also, i386 supports the old_select syscall if newselect is not available + */ +#define __ARCH_WANT_SYS_OLD_SELECT + +#define __nolibc_syscall0(num) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall1(num, arg1) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall2(num, arg1, arg2) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall3(num, arg1, arg2, arg3) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + register long _arg4 __asm__ ("esi") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + register long _arg4 __asm__ ("esi") = (long)(arg4); \ + register long _arg5 __asm__ ("edi") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + long _eax = (long)(num); \ + long _arg6 = (long)(arg6); /* Always in memory */ \ + __asm__ volatile ( \ + "pushl %[_arg6]\n\t" \ + "pushl %%ebp\n\t" \ + "movl 4(%%esp),%%ebp\n\t" \ + "int $0x80\n\t" \ + "popl %%ebp\n\t" \ + "addl $4,%%esp\n\t" \ + : "+a"(_eax) /* %eax */ \ + : "b"(arg1), /* %ebx */ \ + "c"(arg2), /* %ecx */ \ + "d"(arg3), /* %edx */ \ + "S"(arg4), /* %esi */ \ + "D"(arg5), /* %edi */ \ + [_arg6]"m"(_arg6) /* memory */ \ + : "memory", "cc" \ + ); \ + _eax; \ +}) + +#ifndef NOLIBC_NO_RUNTIME +/* startup code */ +/* + * i386 System V ABI mandates: + * 1) last pushed argument must be 16-byte aligned. + * 2) The deepest stack frame should be set to zero + * + */ +void __attribute__((weak, noreturn)) __nolibc_entrypoint __nolibc_no_stack_protector _start(void) +{ + __asm__ volatile ( + "xor %ebp, %ebp\n" /* zero the stack frame */ + "mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */ + "sub $12, %esp\n" /* sub 12 to keep it aligned after the push %eax */ + "push %eax\n" /* push arg1 on stack to support plain stack modes too */ + "call _start_c\n" /* transfer to c runtime */ + "hlt\n" /* ensure it does not return */ + ); + __nolibc_entrypoint_epilogue(); +} +#endif /* NOLIBC_NO_RUNTIME */ + +#else /* !defined(__x86_64__) */ + +/* Syscalls for x86_64 : + * - registers are 64-bit + * - syscall number is passed in rax + * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively + * - the system call is performed by calling the syscall instruction + * - syscall return comes in rax + * - rcx and r11 are clobbered, others are preserved. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1 + * Calling Conventions. + * + * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/home + * + */ + +#define __nolibc_syscall0(num) \ +({ \ + long long _ret; \ + register long long _num __asm__ ("rax") = (num); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall1(num, arg1) \ +({ \ + long long _ret; \ + register long long _num __asm__ ("rax") = (num); \ + register long long _arg1 __asm__ ("rdi") = __nolibc_arg_to_reg(arg1); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall2(num, arg1, arg2) \ +({ \ + long long _ret; \ + register long long _num __asm__ ("rax") = (num); \ + register long long _arg1 __asm__ ("rdi") = __nolibc_arg_to_reg(arg1); \ + register long long _arg2 __asm__ ("rsi") = __nolibc_arg_to_reg(arg2); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall3(num, arg1, arg2, arg3) \ +({ \ + long long _ret; \ + register long long _num __asm__ ("rax") = (num); \ + register long long _arg1 __asm__ ("rdi") = __nolibc_arg_to_reg(arg1); \ + register long long _arg2 __asm__ ("rsi") = __nolibc_arg_to_reg(arg2); \ + register long long _arg3 __asm__ ("rdx") = __nolibc_arg_to_reg(arg3); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + long long _ret; \ + register long long _num __asm__ ("rax") = (num); \ + register long long _arg1 __asm__ ("rdi") = __nolibc_arg_to_reg(arg1); \ + register long long _arg2 __asm__ ("rsi") = __nolibc_arg_to_reg(arg2); \ + register long long _arg3 __asm__ ("rdx") = __nolibc_arg_to_reg(arg3); \ + register long long _arg4 __asm__ ("r10") = __nolibc_arg_to_reg(arg4); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + long long _ret; \ + register long long _num __asm__ ("rax") = (num); \ + register long long _arg1 __asm__ ("rdi") = __nolibc_arg_to_reg(arg1); \ + register long long _arg2 __asm__ ("rsi") = __nolibc_arg_to_reg(arg2); \ + register long long _arg3 __asm__ ("rdx") = __nolibc_arg_to_reg(arg3); \ + register long long _arg4 __asm__ ("r10") = __nolibc_arg_to_reg(arg4); \ + register long long _arg5 __asm__ ("r8") = __nolibc_arg_to_reg(arg5); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define __nolibc_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + long long _ret; \ + register long long _num __asm__ ("rax") = (num); \ + register long long _arg1 __asm__ ("rdi") = __nolibc_arg_to_reg(arg1); \ + register long long _arg2 __asm__ ("rsi") = __nolibc_arg_to_reg(arg2); \ + register long long _arg3 __asm__ ("rdx") = __nolibc_arg_to_reg(arg3); \ + register long long _arg4 __asm__ ("r10") = __nolibc_arg_to_reg(arg4); \ + register long long _arg5 __asm__ ("r8") = __nolibc_arg_to_reg(arg5); \ + register long long _arg6 __asm__ ("r9") = __nolibc_arg_to_reg(arg6); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6), "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#ifndef NOLIBC_NO_RUNTIME +/* startup code */ +/* + * x86-64 System V ABI mandates: + * 1) %rsp must be 16-byte aligned right before the function call. + * 2) The deepest stack frame should be zero (the %rbp). + * + */ +void __attribute__((weak, noreturn)) __nolibc_entrypoint __nolibc_no_stack_protector _start(void) +{ + __asm__ volatile ( + "xor %ebp, %ebp\n" /* zero the stack frame */ + "mov %rsp, %rdi\n" /* save stack pointer to %rdi, as arg1 of _start_c */ + "call _start_c\n" /* transfer to c runtime */ + "hlt\n" /* ensure it does not return */ + ); + __nolibc_entrypoint_epilogue(); +} +#endif /* NOLIBC_NO_RUNTIME */ + +#define NOLIBC_ARCH_HAS_MEMMOVE +void *memmove(void *dst, const void *src, size_t len); + +#define NOLIBC_ARCH_HAS_MEMCPY +void *memcpy(void *dst, const void *src, size_t len); + +#define NOLIBC_ARCH_HAS_MEMSET +void *memset(void *dst, int c, size_t len); + +__asm__ ( +".pushsection .text.nolibc_memmove_memcpy\n" +".weak memmove\n" +".weak memcpy\n" +"memmove:\n" +"memcpy:\n" + "movq %rdx, %rcx\n\t" + "movq %rdi, %rax\n\t" + "movq %rdi, %rdx\n\t" + "subq %rsi, %rdx\n\t" + "cmpq %rcx, %rdx\n\t" + "jb 1f\n\t" + "rep movsb\n\t" + "retq\n" +"1:" /* backward copy */ + "leaq -1(%rdi, %rcx, 1), %rdi\n\t" + "leaq -1(%rsi, %rcx, 1), %rsi\n\t" + "std\n\t" + "rep movsb\n\t" + "cld\n\t" + "retq\n" +".popsection\n" + +".pushsection .text.nolibc_memset\n" +".weak memset\n" +"memset:\n" + "xchgl %eax, %esi\n\t" + "movq %rdx, %rcx\n\t" + "pushq %rdi\n\t" + "rep stosb\n\t" + "popq %rax\n\t" + "retq\n" +".popsection\n" +); + +#endif /* !defined(__x86_64__) */ +#endif /* _NOLIBC_ARCH_X86_H */ diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h deleted file mode 100644 index 1e40620a2b33..000000000000 --- a/tools/include/nolibc/arch-x86_64.h +++ /dev/null @@ -1,218 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * x86_64 specific definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> - */ - -#ifndef _NOLIBC_ARCH_X86_64_H -#define _NOLIBC_ARCH_X86_64_H - -#include "compiler.h" -#include "crt.h" - -/* Syscalls for x86_64 : - * - registers are 64-bit - * - syscall number is passed in rax - * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively - * - the system call is performed by calling the syscall instruction - * - syscall return comes in rax - * - rcx and r11 are clobbered, others are preserved. - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - * - the syscall number is always specified last in order to allow to force - * some registers before (gcc refuses a %-register at the last position). - * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1 - * Calling Conventions. - * - * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/home - * - */ - -#define my_syscall0(num) \ -({ \ - long _ret; \ - register long _num __asm__ ("rax") = (num); \ - \ - __asm__ volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - long _ret; \ - register long _num __asm__ ("rax") = (num); \ - register long _arg1 __asm__ ("rdi") = (long)(arg1); \ - \ - __asm__ volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), \ - "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - long _ret; \ - register long _num __asm__ ("rax") = (num); \ - register long _arg1 __asm__ ("rdi") = (long)(arg1); \ - register long _arg2 __asm__ ("rsi") = (long)(arg2); \ - \ - __asm__ volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), "r"(_arg2), \ - "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - long _ret; \ - register long _num __asm__ ("rax") = (num); \ - register long _arg1 __asm__ ("rdi") = (long)(arg1); \ - register long _arg2 __asm__ ("rsi") = (long)(arg2); \ - register long _arg3 __asm__ ("rdx") = (long)(arg3); \ - \ - __asm__ volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ - "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - long _ret; \ - register long _num __asm__ ("rax") = (num); \ - register long _arg1 __asm__ ("rdi") = (long)(arg1); \ - register long _arg2 __asm__ ("rsi") = (long)(arg2); \ - register long _arg3 __asm__ ("rdx") = (long)(arg3); \ - register long _arg4 __asm__ ("r10") = (long)(arg4); \ - \ - __asm__ volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - long _ret; \ - register long _num __asm__ ("rax") = (num); \ - register long _arg1 __asm__ ("rdi") = (long)(arg1); \ - register long _arg2 __asm__ ("rsi") = (long)(arg2); \ - register long _arg3 __asm__ ("rdx") = (long)(arg3); \ - register long _arg4 __asm__ ("r10") = (long)(arg4); \ - register long _arg5 __asm__ ("r8") = (long)(arg5); \ - \ - __asm__ volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - long _ret; \ - register long _num __asm__ ("rax") = (num); \ - register long _arg1 __asm__ ("rdi") = (long)(arg1); \ - register long _arg2 __asm__ ("rsi") = (long)(arg2); \ - register long _arg3 __asm__ ("rdx") = (long)(arg3); \ - register long _arg4 __asm__ ("r10") = (long)(arg4); \ - register long _arg5 __asm__ ("r8") = (long)(arg5); \ - register long _arg6 __asm__ ("r9") = (long)(arg6); \ - \ - __asm__ volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_arg6), "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -/* startup code */ -/* - * x86-64 System V ABI mandates: - * 1) %rsp must be 16-byte aligned right before the function call. - * 2) The deepest stack frame should be zero (the %rbp). - * - */ -void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) -{ - __asm__ volatile ( - "xor %ebp, %ebp\n" /* zero the stack frame */ - "mov %rsp, %rdi\n" /* save stack pointer to %rdi, as arg1 of _start_c */ - "and $-16, %rsp\n" /* %rsp must be 16-byte aligned before call */ - "call _start_c\n" /* transfer to c runtime */ - "hlt\n" /* ensure it does not return */ - ); - __nolibc_entrypoint_epilogue(); -} - -#define NOLIBC_ARCH_HAS_MEMMOVE -void *memmove(void *dst, const void *src, size_t len); - -#define NOLIBC_ARCH_HAS_MEMCPY -void *memcpy(void *dst, const void *src, size_t len); - -#define NOLIBC_ARCH_HAS_MEMSET -void *memset(void *dst, int c, size_t len); - -__asm__ ( -".section .text.nolibc_memmove_memcpy\n" -".weak memmove\n" -".weak memcpy\n" -"memmove:\n" -"memcpy:\n" - "movq %rdx, %rcx\n\t" - "movq %rdi, %rax\n\t" - "movq %rdi, %rdx\n\t" - "subq %rsi, %rdx\n\t" - "cmpq %rcx, %rdx\n\t" - "jb 1f\n\t" - "rep movsb\n\t" - "retq\n" -"1:" /* backward copy */ - "leaq -1(%rdi, %rcx, 1), %rdi\n\t" - "leaq -1(%rsi, %rcx, 1), %rsi\n\t" - "std\n\t" - "rep movsb\n\t" - "cld\n\t" - "retq\n" - -".section .text.nolibc_memset\n" -".weak memset\n" -"memset:\n" - "xchgl %eax, %esi\n\t" - "movq %rdx, %rcx\n\t" - "pushq %rdi\n\t" - "rep stosb\n\t" - "popq %rax\n\t" - "retq\n" -); - -#endif /* _NOLIBC_ARCH_X86_64_H */ diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h index c8f4e5d3add9..b69d9c5ec5c6 100644 --- a/tools/include/nolibc/arch.h +++ b/tools/include/nolibc/arch.h @@ -3,26 +3,15 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ -/* Below comes the architecture-specific code. For each architecture, we have - * the syscall declarations and the _start code definition. This is the only - * global part. On all architectures the kernel puts everything in the stack - * before jumping to _start just above us, without any return address (_start - * is not a function but an entry point). So at the stack pointer we find argc. - * Then argv[] begins, and ends at the first NULL. Then we have envp which - * starts and ends with a NULL as well. So envp=argv+argc+1. - */ - #ifndef _NOLIBC_ARCH_H #define _NOLIBC_ARCH_H -#if defined(__x86_64__) -#include "arch-x86_64.h" -#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) -#include "arch-i386.h" +#if defined(__x86_64__) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) +#include "arch-x86.h" #elif defined(__ARM_EABI__) #include "arch-arm.h" #elif defined(__aarch64__) -#include "arch-aarch64.h" +#include "arch-arm64.h" #elif defined(__mips__) #include "arch-mips.h" #elif defined(__powerpc__) @@ -33,6 +22,16 @@ #include "arch-s390.h" #elif defined(__loongarch__) #include "arch-loongarch.h" +#elif defined(__sparc__) +#include "arch-sparc.h" +#elif defined(__m68k__) +#include "arch-m68k.h" +#elif defined(__sh__) +#include "arch-sh.h" +#elif defined(__or1k__) +#include "arch-openrisc.h" +#elif defined(__hppa__) +#include "arch-parisc.h" #else #error Unsupported Architecture #endif diff --git a/tools/include/nolibc/assert.h b/tools/include/nolibc/assert.h new file mode 100644 index 000000000000..84ff8ad9ab07 --- /dev/null +++ b/tools/include/nolibc/assert.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Assert for NOLIBC + * Copyright (C) 2026 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_ASSERT_H +#define _NOLIBC_ASSERT_H + +#include "errno.h" +#include "stdio.h" +#include "stdlib.h" + +#endif /* _NOLIBC_ASSERT_H */ + +/* NDEBUG needs to be evaluated on *each* inclusion */ +#ifdef assert +#undef assert +#endif + +#ifndef NDEBUG +#define assert(expr) \ +({ \ + if (!(expr)) { \ + fprintf(stderr, "%s: %s:%d: %s: Assertion `%s' failed.\n", \ + program_invocation_short_name, __FILE__, __LINE__, __func__, \ + #expr); \ + abort(); \ + } \ +}) +#else +#define assert(expr) ((void)0) +#endif diff --git a/tools/include/nolibc/byteswap.h b/tools/include/nolibc/byteswap.h new file mode 100644 index 000000000000..45bbf9609d7a --- /dev/null +++ b/tools/include/nolibc/byteswap.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Byte swapping for NOLIBC + * Copyright (C) 2026 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_BYTESWAP_H +#define _NOLIBC_BYTESWAP_H + +#include "stdint.h" + +#include <linux/swab.h> + +#define bswap_16(_x) __swab16(_x) +#define bswap_32(_x) __swab32(_x) +#define bswap_64(_x) __swab64(_x) + +#endif /* _NOLIBC_BYTESWAP_H */ diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h index fa1f547e7f13..f2d7a81d0d7c 100644 --- a/tools/include/nolibc/compiler.h +++ b/tools/include/nolibc/compiler.h @@ -12,6 +12,15 @@ # define __nolibc_has_attribute(attr) 0 #endif +#if defined(__has_feature) +# define __nolibc_has_feature(feature) __has_feature(feature) +#else +# define __nolibc_has_feature(feature) 0 +#endif + +#define __nolibc_aligned(alignment) __attribute__((aligned(alignment))) +#define __nolibc_aligned_as(type) __nolibc_aligned(__alignof__(type)) + #if __nolibc_has_attribute(naked) # define __nolibc_entrypoint __attribute__((naked)) # define __nolibc_entrypoint_epilogue() @@ -27,15 +36,58 @@ #endif /* defined(__SSP__) ... */ #if __nolibc_has_attribute(no_stack_protector) -# define __no_stack_protector __attribute__((no_stack_protector)) +# define __nolibc_no_stack_protector __attribute__((no_stack_protector)) #else -# define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector"))) +# define __nolibc_no_stack_protector __attribute__((__optimize__("-fno-stack-protector"))) #endif /* __nolibc_has_attribute(no_stack_protector) */ -#if __nolibc_has_attribute(fallthrough) -# define __nolibc_fallthrough do { } while (0); __attribute__((fallthrough)) +#if __nolibc_has_attribute(__fallthrough__) +# define __nolibc_fallthrough do { } while (0); __attribute__((__fallthrough__)) #else # define __nolibc_fallthrough do { } while (0) #endif /* __nolibc_has_attribute(fallthrough) */ +#if defined(__STDC_VERSION__) +# define __nolibc_stdc_version __STDC_VERSION__ +#else +# define __nolibc_stdc_version 0 +#endif + +#define __nolibc_version(_major, _minor, _patch) ((_major) * 10000 + (_minor) * 100 + (_patch)) + +#ifdef __GNUC__ +# define __nolibc_gnuc_version \ + __nolibc_version(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#else +# define __nolibc_gnuc_version 0 +#endif /* __GNUC__ */ + +#ifdef __clang__ +# define __nolibc_clang_version \ + __nolibc_version(__clang_major__, __clang_minor__, __clang_patchlevel__) +#else +# define __nolibc_clang_version 0 +#endif /* __clang__ */ + +#if __nolibc_stdc_version >= 201112L || \ + __nolibc_gnuc_version >= __nolibc_version(4, 6, 0) || \ + __nolibc_clang_version >= __nolibc_version(3, 0, 0) +# define __nolibc_static_assert(_t) _Static_assert(_t, "") +#else +# define __nolibc_static_assert(_t) +#endif + +/* Make the optimizer believe the variable can be manipulated arbitrarily. */ +#define _NOLIBC_OPTIMIZER_HIDE_VAR(var) __asm__ ("" : "+r" (var)) + +#if __nolibc_has_feature(undefined_behavior_sanitizer) +# if defined(__clang__) +# define __nolibc_no_sanitize_undefined __attribute__((no_sanitize("function"))) +# else +# define __nolibc_no_sanitize_undefined __attribute__((no_sanitize_undefined)) +# endif +#else +# define __nolibc_no_sanitize_undefined +#endif + #endif /* _NOLIBC_COMPILER_H */ diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h index bbcd5fd09806..714256228914 100644 --- a/tools/include/nolibc/crt.h +++ b/tools/include/nolibc/crt.h @@ -7,11 +7,21 @@ #ifndef _NOLIBC_CRT_H #define _NOLIBC_CRT_H +#define __nolibc_arg_to_reg(_a) \ + __builtin_choose_expr(__builtin_classify_type(_a) == __builtin_classify_type(NULL), \ + (unsigned long)(_a), (_a)) + +#ifndef NOLIBC_NO_RUNTIME + +#include "compiler.h" + char **environ __attribute__((weak)); const unsigned long *_auxv __attribute__((weak)); +void _start(void); static void __stack_chk_init(void); static void exit(int); +static char *strrchr(const char *s, int c); extern void (*const __preinit_array_start[])(int, char **, char**) __attribute__((weak)); extern void (*const __preinit_array_end[])(int, char **, char**) __attribute__((weak)); @@ -22,7 +32,26 @@ extern void (*const __init_array_end[])(int, char **, char**) __attribute__((wea extern void (*const __fini_array_start[])(void) __attribute__((weak)); extern void (*const __fini_array_end[])(void) __attribute__((weak)); -__attribute__((weak,used)) +#ifndef NOLIBC_IGNORE_ERRNO +extern char *program_invocation_name __attribute__((weak)); +extern char *program_invocation_short_name __attribute__((weak)); + +static __inline__ +char *__nolibc_program_invocation_short_name(char *long_name) +{ + + char *short_name; + + short_name = strrchr(long_name, '/'); + if (!short_name || !short_name[0]) + return long_name; + + return short_name + 1; +} +#endif /* NOLIBC_IGNORE_ERRNO */ + +void _start_c(long *sp); +__attribute__((weak,used)) __nolibc_no_sanitize_undefined __nolibc_no_stack_protector void _start_c(long *sp) { long argc; @@ -64,9 +93,16 @@ void _start_c(long *sp) /* find _auxv */ for (auxv = (void *)envp; *auxv++;) - ; + __asm__(""); _auxv = auxv; +#ifndef NOLIBC_IGNORE_ERRNO + if (argc > 0 && argv[0]) { + program_invocation_name = argv[0]; + program_invocation_short_name = __nolibc_program_invocation_short_name(argv[0]); + } +#endif /* NOLIBC_IGNORE_ERRNO */ + for (ctor_func = __preinit_array_start; ctor_func < __preinit_array_end; ctor_func++) (*ctor_func)(argc, argv, envp); for (ctor_func = __init_array_start; ctor_func < __init_array_end; ctor_func++) @@ -81,4 +117,5 @@ void _start_c(long *sp) exit(exitcode); } +#endif /* NOLIBC_NO_RUNTIME */ #endif /* _NOLIBC_CRT_H */ diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h index 6f90706d0644..470fdf34394a 100644 --- a/tools/include/nolibc/ctype.h +++ b/tools/include/nolibc/ctype.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_CTYPE_H #define _NOLIBC_CTYPE_H @@ -96,7 +99,4 @@ int ispunct(int c) return isgraph(c) && !isalnum(c); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_CTYPE_H */ diff --git a/tools/include/nolibc/dirent.h b/tools/include/nolibc/dirent.h new file mode 100644 index 000000000000..4e02ef25e72d --- /dev/null +++ b/tools/include/nolibc/dirent.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Directory access for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_DIRENT_H +#define _NOLIBC_DIRENT_H + +#include "compiler.h" +#include "stdint.h" +#include "types.h" +#include "fcntl.h" + +#include <linux/limits.h> + +struct dirent { + ino_t d_ino; + char d_name[NAME_MAX + 1]; +}; + +/* See comment of FILE in stdio.h */ +typedef struct { + char dummy[1]; +} DIR; + +static __attribute__((unused)) +DIR *fdopendir(int fd) +{ + if (fd < 0) { + SET_ERRNO(EBADF); + return NULL; + } + return (DIR *)(intptr_t)~fd; +} + +static __attribute__((unused)) +DIR *opendir(const char *name) +{ + int fd; + + fd = open(name, O_RDONLY); + if (fd == -1) + return NULL; + return fdopendir(fd); +} + +static __attribute__((unused)) +int closedir(DIR *dirp) +{ + intptr_t i = (intptr_t)dirp; + + if (i >= 0) { + SET_ERRNO(EBADF); + return -1; + } + return close(~i); +} + +static __attribute__((unused)) +int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result) +{ + char buf[sizeof(struct linux_dirent64) + NAME_MAX + 1] __nolibc_aligned_as(struct linux_dirent64); + struct linux_dirent64 *ldir = (void *)buf; + intptr_t i = (intptr_t)dirp; + int fd, ret; + + if (i >= 0) + return EBADF; + + fd = ~i; + + ret = _sys_getdents64(fd, ldir, sizeof(buf)); + if (ret < 0) + return -ret; + if (ret == 0) { + *result = NULL; + return 0; + } + + /* + * getdents64() returns as many entries as fit the buffer. + * readdir() can only return one entry at a time. + * Make sure the non-returned ones are not skipped. + */ + ret = _sys_lseek(fd, ldir->d_off, SEEK_SET); + if (ret < 0) + return -ret; + + entry->d_ino = ldir->d_ino; + /* the destination should always be big enough */ + strlcpy(entry->d_name, ldir->d_name, sizeof(entry->d_name)); + *result = entry; + return 0; +} + +#endif /* _NOLIBC_DIRENT_H */ diff --git a/tools/include/nolibc/elf.h b/tools/include/nolibc/elf.h new file mode 100644 index 000000000000..3e2c5228bf3d --- /dev/null +++ b/tools/include/nolibc/elf.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Shim elf.h header for NOLIBC. + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_SYS_ELF_H +#define _NOLIBC_SYS_ELF_H + +#include <linux/elf.h> + +#endif /* _NOLIBC_SYS_ELF_H */ diff --git a/tools/include/nolibc/endian.h b/tools/include/nolibc/endian.h new file mode 100644 index 000000000000..ccc016ecd5ec --- /dev/null +++ b/tools/include/nolibc/endian.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Byte order conversion for NOLIBC + * Copyright (C) 2026 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_ENDIAN_H +#define _NOLIBC_ENDIAN_H + +#include "stdint.h" + +#include <asm/byteorder.h> + +#define htobe16(_x) __cpu_to_be16(_x) +#define htole16(_x) __cpu_to_le16(_x) +#define be16toh(_x) __be16_to_cpu(_x) +#define le16toh(_x) __le16_to_cpu(_x) + +#define htobe32(_x) __cpu_to_be32(_x) +#define htole32(_x) __cpu_to_le32(_x) +#define be32toh(_x) __be32_to_cpu(_x) +#define le32toh(_x) __le32_to_cpu(_x) + +#define htobe64(_x) __cpu_to_be64(_x) +#define htole64(_x) __cpu_to_le64(_x) +#define be64toh(_x) __be64_to_cpu(_x) +#define le64toh(_x) __le64_to_cpu(_x) + +#endif /* _NOLIBC_ENDIAN_H */ diff --git a/tools/include/nolibc/err.h b/tools/include/nolibc/err.h new file mode 100644 index 000000000000..e22ae87a7289 --- /dev/null +++ b/tools/include/nolibc/err.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * formatted error message for NOLIBC + * Copyright (C) 2026 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_ERR_H +#define _NOLIBC_ERR_H + +#include "errno.h" +#include "stdarg.h" +#include "sys.h" + +static __attribute__((unused)) +void vwarn(const char *fmt, va_list args) +{ + fprintf(stderr, "%s: ", program_invocation_short_name); + vfprintf(stderr, fmt, args); + fprintf(stderr, ": %m\n"); +} + +static __attribute__((unused)) +void vwarnx(const char *fmt, va_list args) +{ + fprintf(stderr, "%s: ", program_invocation_short_name); + vfprintf(stderr, fmt, args); + fprintf(stderr, "\n"); +} + +static __attribute__((unused)) +void warn(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vwarn(fmt, args); + va_end(args); +} + +static __attribute__((unused)) +void warnx(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vwarnx(fmt, args); + va_end(args); +} + +static __attribute__((noreturn, unused)) +void verr(int eval, const char *fmt, va_list args) +{ + vwarn(fmt, args); + exit(eval); +} + +static __attribute__((noreturn, unused)) +void verrx(int eval, const char *fmt, va_list args) +{ + warnx(fmt, args); + exit(eval); +} + +static __attribute__((noreturn, unused)) +void err(int eval, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + verr(eval, fmt, args); + va_end(args); +} + +static __attribute__((noreturn, unused)) +void errx(int eval, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + verrx(eval, fmt, args); + va_end(args); +} + +#endif /* _NOLIBC_ERR_H */ diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h index a44486ff0477..a2325596d550 100644 --- a/tools/include/nolibc/errno.h +++ b/tools/include/nolibc/errno.h @@ -4,16 +4,23 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_ERRNO_H #define _NOLIBC_ERRNO_H -#include <asm/errno.h> +#include <linux/errno.h> #ifndef NOLIBC_IGNORE_ERRNO #define SET_ERRNO(v) do { errno = (v); } while (0) int errno __attribute__((weak)); +char *program_invocation_name __attribute__((weak)) = (char *)""; +char *program_invocation_short_name __attribute__((weak)) = (char *)""; #else #define SET_ERRNO(v) do { } while (0) +#define program_invocation_name "" +#define program_invocation_short_name "" #endif @@ -22,7 +29,4 @@ int errno __attribute__((weak)); */ #define MAX_ERRNO 4095 -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_ERRNO_H */ diff --git a/tools/include/nolibc/fcntl.h b/tools/include/nolibc/fcntl.h new file mode 100644 index 000000000000..d4b6af60d4cc --- /dev/null +++ b/tools/include/nolibc/fcntl.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * fcntl definition for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_FCNTL_H +#define _NOLIBC_FCNTL_H + +#include "arch.h" +#include "types.h" +#include "sys.h" + +#define __nolibc_open_flags(_flags) ((_flags) | O_LARGEFILE) + +#define __nolibc_open_mode(_flags) \ +({ \ + mode_t _mode; \ + va_list args; \ + \ + va_start(args, (_flags)); \ + _mode = va_arg(args, mode_t); \ + va_end(args); \ + \ + _mode; \ +}) + +/* + * int openat(int dirfd, const char *path, int flags[, mode_t mode]); + */ + +static __attribute__((unused)) +int _sys_openat(int dirfd, const char *path, int flags, mode_t mode) +{ + return __nolibc_syscall4(__NR_openat, dirfd, path, flags, mode); +} + +static __attribute__((unused)) +int openat(int dirfd, const char *path, int flags, ...) +{ + return __sysret(_sys_openat(dirfd, path, __nolibc_open_flags(flags), + __nolibc_open_mode(flags))); +} + +/* + * int open(const char *path, int flags[, mode_t mode]); + */ + +static __attribute__((unused)) +int _sys_open(const char *path, int flags, mode_t mode) +{ + return __nolibc_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); +} + +static __attribute__((unused)) +int open(const char *path, int flags, ...) +{ + return __sysret(_sys_open(path, __nolibc_open_flags(flags), __nolibc_open_mode(flags))); +} + +/* + * int creat(const char *path, mode_t mode); + */ + +static __attribute__((unused)) +int creat(const char *path, mode_t mode) +{ + return open(path, O_CREAT | O_WRONLY | O_TRUNC, mode); +} + +#endif /* _NOLIBC_FCNTL_H */ diff --git a/tools/include/nolibc/getopt.h b/tools/include/nolibc/getopt.h new file mode 100644 index 000000000000..3ad140f692df --- /dev/null +++ b/tools/include/nolibc/getopt.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * getopt function definitions for NOLIBC, adapted from musl libc + * Copyright (C) 2005-2020 Rich Felker, et al. + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_GETOPT_H +#define _NOLIBC_GETOPT_H + +struct FILE; +static struct FILE *const stderr; +static int fprintf(struct FILE *stream, const char *fmt, ...); + +__attribute__((weak,unused,section(".data.nolibc_getopt"))) +char *optarg; + +__attribute__((weak,unused,section(".data.nolibc_getopt"))) +int optind = 1, opterr = 1, optopt; + +static __attribute__((unused)) +int getopt(int argc, char * const argv[], const char *optstring) +{ + static int __optpos; + int i; + char c, d; + char *optchar; + + if (!optind) { + __optpos = 0; + optind = 1; + } + + if (optind >= argc || !argv[optind]) + return -1; + + if (argv[optind][0] != '-') { + if (optstring[0] == '-') { + optarg = argv[optind++]; + return 1; + } + return -1; + } + + if (!argv[optind][1]) + return -1; + + if (argv[optind][1] == '-' && !argv[optind][2]) + return optind++, -1; + + if (!__optpos) + __optpos++; + c = argv[optind][__optpos]; + optchar = argv[optind] + __optpos; + __optpos++; + + if (!argv[optind][__optpos]) { + optind++; + __optpos = 0; + } + + if (optstring[0] == '-' || optstring[0] == '+') + optstring++; + + i = 0; + d = 0; + do { + d = optstring[i++]; + } while (d && d != c); + + if (!d || d != c || c == ':') { + optopt = c; + if (optstring[0] != ':' && opterr) + fprintf(stderr, "%s: unrecognized option: %c\n", argv[0], *optchar); + return '?'; + } + if (optstring[i] == ':') { + optarg = NULL; + if (optstring[i + 1] != ':' || __optpos) { + optarg = argv[optind++]; + if (__optpos) + optarg += __optpos; + __optpos = 0; + } + if (optind > argc) { + optopt = c; + if (optstring[0] == ':') + return ':'; + if (opterr) + fprintf(stderr, "%s: option requires argument: %c\n", + argv[0], *optchar); + return '?'; + } + } + return c; +} + +#endif /* _NOLIBC_GETOPT_H */ diff --git a/tools/include/nolibc/inttypes.h b/tools/include/nolibc/inttypes.h new file mode 100644 index 000000000000..1977bd74bfeb --- /dev/null +++ b/tools/include/nolibc/inttypes.h @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ + +#include "nolibc.h" diff --git a/tools/include/nolibc/limits.h b/tools/include/nolibc/limits.h new file mode 100644 index 000000000000..306d4141f4d2 --- /dev/null +++ b/tools/include/nolibc/limits.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Shim limits.h header for NOLIBC. + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +#include "nolibc.h" diff --git a/tools/include/nolibc/math.h b/tools/include/nolibc/math.h new file mode 100644 index 000000000000..9df823ddd412 --- /dev/null +++ b/tools/include/nolibc/math.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * math definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_SYS_MATH_H +#define _NOLIBC_SYS_MATH_H + +static __inline__ +double fabs(double x) +{ + return x >= 0 ? x : -x; +} + +static __inline__ +float fabsf(float x) +{ + return x >= 0 ? x : -x; +} + +static __inline__ +long double fabsl(long double x) +{ + return x >= 0 ? x : -x; +} + +#endif /* _NOLIBC_SYS_MATH_H */ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 92436b1e4441..faa94f247281 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -12,27 +12,26 @@ * * Syscalls are split into 3 levels: * - The lower level is the arch-specific syscall() definition, consisting in - * assembly code in compound expressions. These are called my_syscall0() to - * my_syscall6() depending on the number of arguments. All input arguments + * assembly code in compound expressions. These are called __nolibc_syscall0() to + * __nolibc_syscall6() depending on the number of arguments. All input arguments * are castto a long stored in a register. These expressions always return * the syscall's return value as a signed long value which is often either * a pointer or the negated errno value. * * - The second level is mostly architecture-independent. It is made of - * static functions called sys_<name>() which rely on my_syscallN() + * static functions called _sys_<name>() which rely on __nolibc_syscallN() * depending on the syscall definition. These functions are responsible * for exposing the appropriate types for the syscall arguments (int, * pointers, etc) and for setting the appropriate return type (often int). * A few of them are architecture-specific because the syscalls are not all * mapped exactly the same among architectures. For example, some archs do - * not implement select() and need pselect6() instead, so the sys_select() + * not implement select() and need pselect6() instead, so the _sys_select() * function will have to abstract this. * * - The third level is the libc call definition. It exposes the lower raw - * sys_<name>() calls in a way that looks like what a libc usually does, + * _sys_<name>() calls in a way that looks like what a libc usually does, * takes care of specific input values, and of setting errno upon error. - * There can be minor variations compared to standard libc calls. For - * example the open() call always takes 3 args here. + * There can be minor variations compared to standard libc calls. * * The errno variable is declared static and unused. This way it can be * optimized away if not used. However this means that a program made of @@ -97,14 +96,45 @@ #include "arch.h" #include "types.h" #include "sys.h" +#include "sys/auxv.h" +#include "sys/ioctl.h" +#include "sys/mman.h" +#include "sys/mount.h" +#include "sys/prctl.h" +#include "sys/ptrace.h" +#include "sys/random.h" +#include "sys/reboot.h" +#include "sys/resource.h" +#include "sys/select.h" +#include "sys/stat.h" +#include "sys/syscall.h" +#include "sys/sysmacros.h" +#include "sys/time.h" +#include "sys/timerfd.h" +#include "sys/uio.h" +#include "sys/utsname.h" +#include "sys/wait.h" #include "ctype.h" +#include "elf.h" +#include "sched.h" #include "signal.h" #include "unistd.h" +#include "stdbool.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include "time.h" #include "stackprotector.h" +#include "dirent.h" +#include "fcntl.h" +#include "getopt.h" +#include "poll.h" +#include "math.h" +#include "err.h" +#include "byteswap.h" +#include "endian.h" +#include "assert.h" +#include "alloca.h" /* Used by programs to avoid std includes */ #define NOLIBC diff --git a/tools/include/nolibc/poll.h b/tools/include/nolibc/poll.h new file mode 100644 index 000000000000..dbcf883da237 --- /dev/null +++ b/tools/include/nolibc/poll.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * poll definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_POLL_H +#define _NOLIBC_POLL_H + +#include "arch.h" +#include "sys.h" + +#include <linux/poll.h> +#include <linux/time.h> + +/* + * int poll(struct pollfd *fds, int nfds, int timeout); + */ + +static __attribute__((unused)) +int _sys_poll(struct pollfd *fds, int nfds, int timeout) +{ +#if defined(__NR_ppoll_time64) + struct __kernel_timespec t; + + if (timeout >= 0) { + t.tv_sec = timeout / 1000; + t.tv_nsec = (timeout % 1000) * 1000000; + } + return __nolibc_syscall5(__NR_ppoll_time64, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); +#else + struct __kernel_old_timespec t; + + if (timeout >= 0) { + t.tv_sec = timeout / 1000; + t.tv_nsec = (timeout % 1000) * 1000000; + } + return __nolibc_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); +#endif +} + +static __attribute__((unused)) +int poll(struct pollfd *fds, int nfds, int timeout) +{ + return __sysret(_sys_poll(fds, nfds, timeout)); +} + +#endif /* _NOLIBC_POLL_H */ diff --git a/tools/include/nolibc/sched.h b/tools/include/nolibc/sched.h new file mode 100644 index 000000000000..7a5f6d9484c8 --- /dev/null +++ b/tools/include/nolibc/sched.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * sched function definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_SCHED_H +#define _NOLIBC_SCHED_H + +#include "sys.h" + +#include <linux/sched.h> + +/* + * int setns(int fd, int nstype); + */ + +static __attribute__((unused)) +int _sys_setns(int fd, int nstype) +{ + return __nolibc_syscall2(__NR_setns, fd, nstype); +} + +static __attribute__((unused)) +int setns(int fd, int nstype) +{ + return __sysret(_sys_setns(fd, nstype)); +} + + +/* + * int unshare(int flags); + */ + +static __attribute__((unused)) +int _sys_unshare(int flags) +{ + return __nolibc_syscall1(__NR_unshare, flags); +} + +static __attribute__((unused)) +int unshare(int flags) +{ + return __sysret(_sys_unshare(flags)); +} + +#endif /* _NOLIBC_SCHED_H */ diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h index 137552216e46..99a0489fe3e8 100644 --- a/tools/include/nolibc/signal.h +++ b/tools/include/nolibc/signal.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_SIGNAL_H #define _NOLIBC_SIGNAL_H @@ -13,13 +16,11 @@ #include "sys.h" /* This one is not marked static as it's needed by libgcc for divide by zero */ +int raise(int signal); __attribute__((weak,unused,section(".text.nolibc_raise"))) int raise(int signal) { - return sys_kill(sys_getpid(), signal); + return _sys_kill(_sys_getpid(), signal); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_SIGNAL_H */ diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h index 1d0d5259ec41..916a92062ba0 100644 --- a/tools/include/nolibc/stackprotector.h +++ b/tools/include/nolibc/stackprotector.h @@ -9,6 +9,7 @@ #include "compiler.h" +#ifndef NOLIBC_NO_RUNTIME #if defined(_NOLIBC_STACKPROTECTOR) #include "sys.h" @@ -18,16 +19,18 @@ * triggering stack protector errors themselves */ +void __stack_chk_fail(void); __attribute__((weak,used,noreturn,section(".text.nolibc_stack_chk"))) void __stack_chk_fail(void) { pid_t pid; - my_syscall3(__NR_write, STDERR_FILENO, "!!Stack smashing detected!!\n", 28); - pid = my_syscall0(__NR_getpid); - my_syscall2(__NR_kill, pid, SIGABRT); + __nolibc_syscall3(__NR_write, STDERR_FILENO, "!!Stack smashing detected!!\n", 28); + pid = __nolibc_syscall0(__NR_getpid); + __nolibc_syscall2(__NR_kill, pid, SIGABRT); for (;;); } +void __stack_chk_fail_local(void); __attribute__((weak,noreturn,section(".text.nolibc_stack_chk"))) void __stack_chk_fail_local(void) { @@ -37,9 +40,10 @@ void __stack_chk_fail_local(void) __attribute__((weak,used,section(".data.nolibc_stack_chk"))) uintptr_t __stack_chk_guard; -static __no_stack_protector void __stack_chk_init(void) +static __nolibc_no_stack_protector void __stack_chk_init(void) { - my_syscall3(__NR_getrandom, &__stack_chk_guard, sizeof(__stack_chk_guard), 0); + __nolibc_syscall3(__NR_getrandom, &__stack_chk_guard, sizeof(__stack_chk_guard), + GRND_INSECURE | GRND_NONBLOCK); /* a bit more randomness in case getrandom() fails, ensure the guard is never 0 */ if (__stack_chk_guard != (uintptr_t) &__stack_chk_guard) __stack_chk_guard ^= (uintptr_t) &__stack_chk_guard; @@ -47,5 +51,6 @@ static __no_stack_protector void __stack_chk_init(void) #else /* !defined(_NOLIBC_STACKPROTECTOR) */ static void __stack_chk_init(void) {} #endif /* defined(_NOLIBC_STACKPROTECTOR) */ +#endif /* NOLIBC_NO_RUNTIME */ #endif /* _NOLIBC_STACKPROTECTOR_H */ diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h index 933bc0be7e1c..dc2dc46f8ca8 100644 --- a/tools/include/nolibc/std.h +++ b/tools/include/nolibc/std.h @@ -13,24 +13,22 @@ * syscall-specific stuff, as this file is expected to be included very early. */ -/* note: may already be defined */ -#ifndef NULL -#define NULL ((void *)0) -#endif - #include "stdint.h" +#include "stddef.h" + +#include <linux/types.h> /* those are commonly provided by sys/types.h */ -typedef unsigned int dev_t; -typedef unsigned long ino_t; +typedef uint64_t dev_t; +typedef uint64_t ino_t; typedef unsigned int mode_t; typedef signed int pid_t; typedef unsigned int uid_t; typedef unsigned int gid_t; typedef unsigned long nlink_t; -typedef signed long off_t; +typedef int64_t off_t; typedef signed long blksize_t; typedef signed long blkcnt_t; -typedef signed long time_t; +typedef __kernel_time64_t time_t; #endif /* _NOLIBC_STD_H */ diff --git a/tools/include/nolibc/stddef.h b/tools/include/nolibc/stddef.h new file mode 100644 index 000000000000..a3976341afdd --- /dev/null +++ b/tools/include/nolibc/stddef.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Stddef definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_STDDEF_H +#define _NOLIBC_STDDEF_H + +#include "stdint.h" + +/* note: may already be defined */ +#ifndef NULL +#define NULL ((void *)0) +#endif + +#ifndef offsetof +#define offsetof(TYPE, FIELD) __builtin_offsetof(TYPE, FIELD) +#endif + +#endif /* _NOLIBC_STDDEF_H */ diff --git a/tools/include/nolibc/stdint.h b/tools/include/nolibc/stdint.h index cd79ddd6170e..b052ad6303c3 100644 --- a/tools/include/nolibc/stdint.h +++ b/tools/include/nolibc/stdint.h @@ -39,8 +39,8 @@ typedef size_t uint_fast32_t; typedef int64_t int_fast64_t; typedef uint64_t uint_fast64_t; -typedef int64_t intmax_t; -typedef uint64_t uintmax_t; +typedef __INTMAX_TYPE__ intmax_t; +typedef __UINTMAX_TYPE__ uintmax_t; /* limits of integral types */ diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index 3892034198dd..ebdd413d13ec 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -4,12 +4,16 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_STDIO_H #define _NOLIBC_STDIO_H #include "std.h" #include "arch.h" #include "errno.h" +#include "fcntl.h" #include "types.h" #include "sys.h" #include "stdarg.h" @@ -17,6 +21,8 @@ #include "string.h" #include "compiler.h" +static const char *strerror(int errnum); + #ifndef EOF #define EOF (-1) #endif @@ -50,6 +56,32 @@ FILE *fdopen(int fd, const char *mode __attribute__((unused))) return (FILE*)(intptr_t)~fd; } +static __attribute__((unused)) +FILE *fopen(const char *pathname, const char *mode) +{ + int flags, fd; + + switch (*mode) { + case 'r': + flags = O_RDONLY; + break; + case 'w': + flags = O_WRONLY | O_CREAT | O_TRUNC; + break; + case 'a': + flags = O_WRONLY | O_CREAT | O_APPEND; + break; + default: + SET_ERRNO(EINVAL); return NULL; + } + + if (mode[1] == '+') + flags = (flags & ~(O_RDONLY | O_WRONLY)) | O_RDWR; + + fd = open(pathname, flags, 0666); + return fdopen(fd, mode); +} + /* provides the fd of stream. */ static __attribute__((unused)) int fileno(FILE *stream) @@ -138,7 +170,7 @@ int putchar(int c) } -/* fwrite(), puts(), fputs(). Note that puts() emits '\n' but not fputs(). */ +/* fwrite(), fread(), puts(), fputs(). Note that puts() emits '\n' but not fputs(). */ /* internal fwrite()-like function which only takes a size and returns 0 on * success or EOF on error. It automatically retries on short writes. @@ -172,6 +204,38 @@ size_t fwrite(const void *s, size_t size, size_t nmemb, FILE *stream) return written; } +/* internal fread()-like function which only takes a size and returns 0 on + * success or EOF on error. It automatically retries on short reads. + */ +static __attribute__((unused)) +int _fread(void *buf, size_t size, FILE *stream) +{ + int fd = fileno(stream); + ssize_t ret; + + while (size) { + ret = read(fd, buf, size); + if (ret <= 0) + return EOF; + size -= ret; + buf += ret; + } + return 0; +} + +static __attribute__((unused)) +size_t fread(void *s, size_t size, size_t nmemb, FILE *stream) +{ + size_t nread; + + for (nread = 0; nread < nmemb; nread++) { + if (_fread(s, size, stream) != 0) + break; + s += size; + } + return nread; +} + static __attribute__((unused)) int fputs(const char *s, FILE *stream) { @@ -208,117 +272,392 @@ char *fgets(char *s, int size, FILE *stream) } -/* minimal vfprintf(). It supports the following formats: - * - %[l*]{d,u,c,x,p} - * - %s - * - unknown modifiers are ignored. +/* fseek */ +static __attribute__((unused)) +int fseek(FILE *stream, long offset, int whence) +{ + int fd = fileno(stream); + off_t ret; + + ret = lseek(fd, offset, whence); + + /* lseek() and fseek() differ in that lseek returns the new + * position or -1, fseek() returns either 0 or -1. + */ + if (ret >= 0) + return 0; + + return -1; +} + + +/* printf(). Supports most of the normal integer and string formats. + * - %[#0-+ ][width|*[.precision|*}][{l,t,z,ll,L,j,q}]{c,d,i,u,o,x,X,p,s,m,%} + * - %% generates a single % + * - %m outputs strerror(errno). + * - %X outputs a..f the same as %x. + * - No support for floating point or wide characters. + * - Invalid formats are copied to the output buffer. + * + * Called by vfprintf() and snprintf() to do the actual formatting. + * The callers provide a callback function to save the formatted data. + * The callback function is called multiple times: + * - for each group of literal characters in the format string. + * - for field padding. + * - for each conversion specifier. + * - with (NULL, 0) at the end of the __nolibc_printf. + * If the callback returns non-zero __nolibc_printf() immediately returns -1. */ -static __attribute__((unused, format(printf, 2, 0))) -int vfprintf(FILE *stream, const char *fmt, va_list args) + +typedef int (*__nolibc_printf_cb)(void *state, const char *buf, size_t size); + +/* This code uses 'flag' variables that are indexed by the low 6 bits + * of characters to optimise checks for multiple characters. + * + * _NOLIBC_PF_FLAGS_CONTAIN(flags, 'a', 'b'. ...) + * returns non-zero if the bit for any of the specified characters is set. + * + * _NOLIBC_PF_CHAR_IS_ONE_OF(ch, 'a', 'b'. ...) + * returns the flag bit for ch if it is one of the specified characters. + * All the characters must be in the same 32 character block (non-alphabetic, + * upper case, or lower case) of the ASCII character set. + */ +#define _NOLIBC_PF_FLAG(ch) (1u << ((ch) & 0x1f)) +#define _NOLIBC_PF_FLAG_NZ(ch) ((ch) ? _NOLIBC_PF_FLAG(ch) : 0) +#define _NOLIBC_PF_FLAG8(cmp_1, cmp_2, cmp_3, cmp_4, cmp_5, cmp_6, cmp_7, cmp_8, ...) \ + (_NOLIBC_PF_FLAG_NZ(cmp_1) | _NOLIBC_PF_FLAG_NZ(cmp_2) | \ + _NOLIBC_PF_FLAG_NZ(cmp_3) | _NOLIBC_PF_FLAG_NZ(cmp_4) | \ + _NOLIBC_PF_FLAG_NZ(cmp_5) | _NOLIBC_PF_FLAG_NZ(cmp_6) | \ + _NOLIBC_PF_FLAG_NZ(cmp_7) | _NOLIBC_PF_FLAG_NZ(cmp_8)) +#define _NOLIBC_PF_FLAGS_CONTAIN(flags, ...) \ + ((flags) & _NOLIBC_PF_FLAG8(__VA_ARGS__, 0, 0, 0, 0, 0, 0, 0)) +#define _NOLIBC_PF_CHAR_IS_ONE_OF(ch, cmp_1, ...) \ + ((unsigned int)(ch) - (cmp_1 & 0xe0) > 0x1f ? 0 : \ + _NOLIBC_PF_FLAGS_CONTAIN(_NOLIBC_PF_FLAG(ch), cmp_1, __VA_ARGS__)) + +static __attribute__((unused, format(printf, 3, 0))) +int __nolibc_printf(__nolibc_printf_cb cb, void *state, const char *fmt, va_list args) { - char escape, lpref, c; + char ch; unsigned long long v; - unsigned int written; - size_t len, ofs; - char tmpbuf[21]; + long long signed_v; + int written, width, precision, len; + unsigned int flags, ch_flag; + char outbuf[2 + 31 + 22 + 1]; + char *out; const char *outstr; + unsigned int sign_prefix; + int got_width; - written = ofs = escape = lpref = 0; + written = 0; while (1) { - c = fmt[ofs++]; - - if (escape) { - /* we're in an escape sequence, ofs == 1 */ - escape = 0; - if (c == 'c' || c == 'd' || c == 'u' || c == 'x' || c == 'p') { - char *out = tmpbuf; - - if (c == 'p') - v = va_arg(args, unsigned long); - else if (lpref) { - if (lpref > 1) - v = va_arg(args, unsigned long long); - else - v = va_arg(args, unsigned long); - } else - v = va_arg(args, unsigned int); - - if (c == 'd') { - /* sign-extend the value */ - if (lpref == 0) - v = (long long)(int)v; - else if (lpref == 1) - v = (long long)(long)v; + outstr = fmt; + ch = *fmt++; + if (!ch) + break; + + width = 0; + flags = 0; + if (ch != '%') { + while (*fmt && *fmt != '%') + fmt++; + /* Output characters from the format string. */ + len = fmt - outstr; + goto do_output; + } + + /* we're in a format sequence */ + + /* Conversion flag characters */ + while (1) { + ch = *fmt++; + ch_flag = _NOLIBC_PF_CHAR_IS_ONE_OF(ch, ' ', '#', '+', '-', '0'); + if (!ch_flag) + break; + flags |= ch_flag; + } + + /* Width and precision */ + for (got_width = 0;; ch = *fmt++) { + if (ch == '*') { + precision = va_arg(args, int); + ch = *fmt++; + } else { + for (precision = 0; ch >= '0' && ch <= '9'; ch = *fmt++) + precision = precision * 10 + (ch - '0'); + } + if (got_width) + break; + width = precision; + if (ch != '.') { + /* Default precision for strings */ + precision = -1; + break; + } + got_width = 1; + } + /* A negative width (e.g. from "%*s") requests left justify. */ + if (width < 0) { + width = -width; + flags |= _NOLIBC_PF_FLAG('-'); + } + + /* Length modifier. + * They miss the conversion flags characters " #+-0" so can go into flags. + * Change both L and ll to j (all always 64bit). + */ + if (ch == 'L') + ch = 'j'; + ch_flag = _NOLIBC_PF_CHAR_IS_ONE_OF(ch, 'l', 't', 'z', 'j', 'q'); + if (ch_flag != 0) { + if (ch == 'l' && fmt[0] == 'l') { + fmt++; + ch_flag = _NOLIBC_PF_FLAG('j'); + } + flags |= ch_flag; + ch = *fmt++; + } + + /* Conversion specifiers. */ + + /* Numeric and pointer conversion specifiers. + * + * Use an explicit bound check (rather than _NOLIBC_PF_CHAR_IS_ONE_OF()) + * so that 'X' can be allowed through. + * 'X' gets treated and 'x' because _NOLIBC_PF_FLAG() returns the same + * value for both. + * + * We need to check for "%p" or "%#x" later, merging here gives better code. + * But '#' collides with 'c' so shift right. + */ + ch_flag = _NOLIBC_PF_FLAG(ch) | (flags & _NOLIBC_PF_FLAG('#')) >> 1; + if (((ch >= 'a' && ch <= 'z') || ch == 'X') && + _NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 'c', 'd', 'i', 'u', 'o', 'x', 'p', 's')) { + /* 'long' is needed for pointer/string conversions and ltz lengths. + * A single test can be used provided 'p' (the same bit as '0') + * is masked from flags. + */ + if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag | (flags & ~_NOLIBC_PF_FLAG('p')), + 'p', 's', 'l', 't', 'z')) { + v = va_arg(args, unsigned long); + signed_v = (long)v; + } else if (_NOLIBC_PF_FLAGS_CONTAIN(flags, 'j', 'q')) { + v = va_arg(args, unsigned long long); + signed_v = v; + } else { + v = va_arg(args, unsigned int); + signed_v = (int)v; + } + + if (ch == 'c') { + /* "%c" - single character. */ + outbuf[0] = v; + len = 1; + outstr = outbuf; + goto do_output; + } + + if (ch == 's') { + /* "%s" - character string. */ + outstr = (const char *)(uintptr_t)v; + if (!outstr) { + outstr = "(null)"; + /* Match glibc, nothing output if precision too small */ + len = precision < 0 || precision >= 6 ? 6 : 0; + goto do_output; + } + goto do_strlen_output; + } + + /* The 'sign_prefix' can be zero, one or two ("0x") characters. + * Prepended least significant byte first stopping on a zero byte. + */ + sign_prefix = 0; + + if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 'd', 'i')) { + /* "%d" and "%i" - signed decimal numbers. */ + if (signed_v < 0) { + sign_prefix = '-'; + v = -(signed_v + 1); + v++; + } else if (_NOLIBC_PF_FLAGS_CONTAIN(flags, '+')) { + sign_prefix = '+'; + } else if (_NOLIBC_PF_FLAGS_CONTAIN(flags, ' ')) { + sign_prefix = ' '; } + } else { + /* "#o" requires that the output always starts with a '0'. + * This needs another check after any zero padding to avoid + * adding an extra leading '0'. + */ + if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 'o') && + _NOLIBC_PF_FLAGS_CONTAIN(ch_flag, '#' - 1)) + sign_prefix = '0'; + } - switch (c) { - case 'c': - out[0] = v; - out[1] = 0; - break; - case 'd': - i64toa_r(v, out); - break; - case 'u': - u64toa_r(v, out); - break; - case 'p': - *(out++) = '0'; - *(out++) = 'x'; - __nolibc_fallthrough; - default: /* 'x' and 'p' above */ - u64toh_r(v, out); - break; + /* The value is converted offset into the buffer so that + * 31 zero pad characters and the sign/prefix can be added in front. + * The longest digit string is 22 + 1 for octal conversions. + */ + out = outbuf + 2 + 31; + + if (v == 0) { + /* There are special rules for zero. */ + if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 'p')) { + /* "%p" match glibc, precision is ignored */ + outstr = "(nil)"; + len = 5; + goto do_output; + } + if (!precision) { + /* Explicit %nn.0d, no digits output (except for %#.0o) */ + len = 0; + goto prepend_sign; } - outstr = tmpbuf; + /* All other formats (including "%#x") just output "0". */ + out[0] = '0'; + len = 1; + } else { + /* Convert the number to ascii in the required base. */ + unsigned long long recip; + unsigned int base; + if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 'd', 'i', 'u')) { + base = 10; + recip = _NOLIBC_U64TOA_RECIP(10); + } else if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 'o')) { + base = 8; + recip = _NOLIBC_U64TOA_RECIP(8); + } else { + base = 16; + recip = _NOLIBC_U64TOA_RECIP(16); + if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 'p', '#' - 1)) { + /* "%p" and "%#x" need "0x" prepending. */ + sign_prefix = '0' << 8 | 'x'; + } + } + len = _nolibc_u64toa_base(v, out, base, recip); } - else if (c == 's') { - outstr = va_arg(args, char *); - if (!outstr) - outstr="(null)"; + + /* Add zero padding */ + if (precision < 0) { + /* No explicit precision (or negative from "%.*s"). */ + if (!_NOLIBC_PF_FLAGS_CONTAIN(flags, '0')) + goto no_zero_padding; + if (_NOLIBC_PF_FLAGS_CONTAIN(flags, '-')) + /* Left justify overrides zero pad */ + goto no_zero_padding; + /* eg "%05d", Zero pad to field width less sign. + * Note that precision can end up negative so all + * the variables have to be 'signed int'. + */ + precision = width; + if (sign_prefix) { + precision--; + if (sign_prefix >= 256) + precision--; + } } - else if (c == '%') { - /* queue it verbatim */ - continue; + if (precision > 31) + /* Don't run off the start of outbuf[], arbitrary limit + * longer than the longest number field. */ + precision = 31; + for (; len < precision; len++) { + /* Stop gcc generating horrid code and memset(). */ + _NOLIBC_OPTIMIZER_HIDE_VAR(len); + *--out = '0'; } - else { - /* modifiers or final 0 */ - if (c == 'l') { - /* long format prefix, maintain the escape */ - lpref++; +no_zero_padding: + + /* %#o has set sign_prefix to '0', but we don't want so add an extra + * leading zero here. + * Since the only other byte values of sign_prefix are ' ', '+' and '-' + * it is enough to check that out[] doesn't already start with sign_prefix. + */ + if (sign_prefix - *out) { +prepend_sign: + /* Add the 0, 1 or 2 ("0x") sign/prefix characters at the front. */ + for (; sign_prefix; sign_prefix >>= 8) { + /* Force gcc to increment len inside the loop. */ + _NOLIBC_OPTIMIZER_HIDE_VAR(len); + len++; + *--out = sign_prefix; } - escape = 1; - goto do_escape; } - len = strlen(outstr); - goto flush_str; + outstr = out; + goto do_output; } - /* not an escape sequence */ - if (c == 0 || c == '%') { - /* flush pending data on escape or end */ - escape = 1; - lpref = 0; - outstr = fmt; - len = ofs - 1; - flush_str: - if (_fwrite(outstr, len, stream) != 0) - break; + if (ch == 'm') { +#ifdef NOLIBC_IGNORE_ERRNO + outstr = "unknown error"; +#else + outstr = strerror(errno); +#endif /* NOLIBC_IGNORE_ERRNO */ + goto do_strlen_output; + } - written += len; - do_escape: - if (c == 0) - break; - fmt += ofs; - ofs = 0; - continue; + if (ch != '%') { + /* Invalid format: back up to output the format characters */ + fmt = outstr + 1; + /* and output a '%' now. */ } + /* %% is documented as a 'conversion specifier'. + * Any flags, precision or length modifier are ignored. + */ + len = 1; + width = 0; + outstr = fmt - 1; + goto do_output; + +do_strlen_output: + /* Open coded strnlen() (slightly smaller). */ + for (len = 0; precision < 0 || len < precision; len++) + if (!outstr[len]) + break; - /* literal char, just queue it */ +do_output: + written += len; + + /* Stop gcc back-merging this code into one of the conditionals above. */ + _NOLIBC_OPTIMIZER_HIDE_VAR(len); + + /* Output the characters on the required side of any padding. */ + width -= len; + flags = _NOLIBC_PF_FLAGS_CONTAIN(flags, '-'); + if (flags && cb(state, outstr, len) != 0) + return -1; + while (width > 0) { + /* Output pad in 16 byte blocks with the small block first. */ + int pad_len = ((width - 1) & 15) + 1; + width -= pad_len; + written += pad_len; + if (cb(state, " ", pad_len) != 0) + return -1; + } + if (!flags && cb(state, outstr, len) != 0) + return -1; } + + /* Request a final '\0' be added to the snprintf() output. + * This may be the only call of the cb() function. + */ + if (cb(state, NULL, 0) != 0) + return -1; + return written; } +static int __nolibc_fprintf_cb(void *stream, const char *buf, size_t size) +{ + return _fwrite(buf, size, stream); +} + +static __attribute__((unused, format(printf, 2, 0))) +int vfprintf(FILE *stream, const char *fmt, va_list args) +{ + return __nolibc_printf(__nolibc_fprintf_cb, stream, fmt, args); +} + static __attribute__((unused, format(printf, 1, 0))) int vprintf(const char *fmt, va_list args) { @@ -349,10 +688,269 @@ int printf(const char *fmt, ...) return ret; } +static __attribute__((unused, format(printf, 2, 0))) +int vdprintf(int fd, const char *fmt, va_list args) +{ + FILE *stream; + + stream = fdopen(fd, NULL); + if (!stream) + return -1; + /* Technically 'stream' is leaked, but as it's only a wrapper around 'fd' that is fine */ + return vfprintf(stream, fmt, args); +} + +static __attribute__((unused, format(printf, 2, 3))) +int dprintf(int fd, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vdprintf(fd, fmt, args); + va_end(args); + + return ret; +} + +struct __nolibc_sprintf_cb_state { + char *buf; + size_t space; +}; + +static int __nolibc_sprintf_cb(void *v_state, const char *buf, size_t size) +{ + struct __nolibc_sprintf_cb_state *state = v_state; + size_t space = state->space; + char *tgt; + + /* Truncate the request to fit in the output buffer space. + * The last byte is reserved for the terminating '\0'. + * state->space can only be zero for snprintf(NULL, 0, fmt, args) + * so this normally lets through calls with 'size == 0'. + */ + if (size >= space) { + if (space <= 1) + return 0; + size = space - 1; + } + tgt = state->buf; + + /* __nolibc_printf() ends with cb(state, NULL, 0) to request the output + * buffer be '\0' terminated. + * That will be the only cb() call for, eg, snprintf(buf, sz, ""). + * Zero lengths can occur at other times (eg "%s" for an empty string). + * Unconditionally write the '\0' byte to reduce code size, it is + * normally overwritten by the data being output. + * There is no point adding a '\0' after copied data - there is always + * another call. + */ + *tgt = '\0'; + if (size) { + state->space = space - size; + state->buf = tgt + size; + memcpy(tgt, buf, size); + } + + return 0; +} + +static __attribute__((unused, format(printf, 3, 0))) +int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + struct __nolibc_sprintf_cb_state state = { .buf = buf, .space = size }; + + return __nolibc_printf(__nolibc_sprintf_cb, &state, fmt, args); +} + +static __attribute__((unused, format(printf, 3, 4))) +int snprintf(char *buf, size_t size, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vsnprintf(buf, size, fmt, args); + va_end(args); + + return ret; +} + +static __attribute__((unused, format(printf, 2, 0))) +int vsprintf(char *buf, const char *fmt, va_list args) +{ + return vsnprintf(buf, SIZE_MAX, fmt, args); +} + +static __attribute__((unused, format(printf, 2, 3))) +int sprintf(char *buf, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vsprintf(buf, fmt, args); + va_end(args); + + return ret; +} + +static __attribute__((unused, format(printf, 2, 0))) +int __nolibc_vasprintf(char **strp, const char *fmt, va_list args1, va_list args2) +{ + int len1, len2; + char *buf; + + len1 = vsnprintf(NULL, 0, fmt, args1); + if (len1 < 0) + return -1; + + buf = malloc(len1 + 1); + if (!buf) + return -1; + + len2 = vsnprintf(buf, len1 + 1, fmt, args2); + if (len2 < 0) { + free(buf); + return -1; + } + + *strp = buf; + return len1; +} + +static __attribute__((unused, format(printf, 2, 0))) +int vasprintf(char **strp, const char *fmt, va_list args) +{ + va_list args2; + int ret; + + va_copy(args2, args); + ret = __nolibc_vasprintf(strp, fmt, args, args2); + va_end(args2); + + return ret; +} + +static __attribute__((unused, format(printf, 2, 3))) +int asprintf(char **strp, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vasprintf(strp, fmt, args); + va_end(args); + + return ret; +} + +static __attribute__((unused)) +int vsscanf(const char *str, const char *format, va_list args) +{ + uintmax_t uval; + intmax_t ival; + int base; + char *endptr; + int matches; + int lpref; + + matches = 0; + + while (1) { + if (*format == '%') { + /* start of pattern */ + lpref = 0; + format++; + + if (*format == 'l') { + /* same as in printf() */ + lpref = 1; + format++; + if (*format == 'l') { + lpref = 2; + format++; + } + } + + if (*format == '%') { + /* literal % */ + if ('%' != *str) + goto done; + str++; + format++; + continue; + } else if (*format == 'd') { + ival = strtoll(str, &endptr, 10); + if (lpref == 0) + *va_arg(args, int *) = ival; + else if (lpref == 1) + *va_arg(args, long *) = ival; + else if (lpref == 2) + *va_arg(args, long long *) = ival; + } else if (*format == 'u' || *format == 'x' || *format == 'X') { + base = *format == 'u' ? 10 : 16; + uval = strtoull(str, &endptr, base); + if (lpref == 0) + *va_arg(args, unsigned int *) = uval; + else if (lpref == 1) + *va_arg(args, unsigned long *) = uval; + else if (lpref == 2) + *va_arg(args, unsigned long long *) = uval; + } else if (*format == 'p') { + *va_arg(args, void **) = (void *)strtoul(str, &endptr, 16); + } else { + SET_ERRNO(EILSEQ); + goto done; + } + + format++; + str = endptr; + matches++; + + } else if (*format == '\0') { + goto done; + } else if (isspace(*format)) { + /* skip spaces in format and str */ + while (isspace(*format)) + format++; + while (isspace(*str)) + str++; + } else if (*format == *str) { + /* literal match */ + format++; + str++; + } else { + if (!matches) + matches = EOF; + goto done; + } + } + +done: + return matches; +} + +static __attribute__((unused, format(scanf, 2, 3))) +int sscanf(const char *str, const char *format, ...) +{ + va_list args; + int ret; + + va_start(args, format); + ret = vsscanf(str, format, args); + va_end(args); + return ret; +} + static __attribute__((unused)) void perror(const char *msg) { +#ifdef NOLIBC_IGNORE_ERRNO + fprintf(stderr, "%s%sunknown error\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : ""); +#else fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno); +#endif } static __attribute__((unused)) @@ -378,16 +976,29 @@ int setvbuf(FILE *stream __attribute__((unused)), } static __attribute__((unused)) -const char *strerror(int errno) +int strerror_r(int errnum, char *buf, size_t buflen) { - static char buf[18] = "errno="; + if (buflen < 18) + return ERANGE; - i64toa_r(errno, &buf[6]); - - return buf; + __builtin_memcpy(buf, "errno=", 6); + i64toa_r(errnum, buf + 6); + return 0; } -/* make sure to include all global symbols */ -#include "nolibc.h" +static __attribute__((unused)) +const char *strerror(int errnum) +{ + static char buf[18]; + char *b = buf; + + /* Force gcc to use 'register offset' to access buf[]. */ + _NOLIBC_OPTIMIZER_HIDE_VAR(b); + + /* Use strerror_r() to avoid having the only .data in small programs. */ + strerror_r(errnum, b, sizeof(buf)); + + return b; +} #endif /* _NOLIBC_STDIO_H */ diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h index 75aa273c23a6..1816c2368b68 100644 --- a/tools/include/nolibc/stdlib.h +++ b/tools/include/nolibc/stdlib.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_STDLIB_H #define _NOLIBC_STDLIB_H @@ -29,11 +32,30 @@ static __attribute__((unused)) char itoa_buffer[21]; * As much as possible, please keep functions alphabetically sorted. */ +static __inline__ +int abs(int j) +{ + return j >= 0 ? j : -j; +} + +static __inline__ +long labs(long j) +{ + return j >= 0 ? j : -j; +} + +static __inline__ +long long llabs(long long j) +{ + return j >= 0 ? j : -j; +} + /* must be exported, as it's used by libgcc for various divide functions */ +void abort(void); __attribute__((weak,unused,noreturn,section(".text.nolibc_abort"))) void abort(void) { - sys_kill(sys_getpid(), SIGABRT); + _sys_kill(_sys_getpid(), SIGABRT); for (;;); } @@ -78,6 +100,7 @@ void free(void *ptr) munmap(heap, heap->len); } +#ifndef NOLIBC_NO_RUNTIME /* getenv() tries to find the environment variable named <name> in the * environment array pointed to by global variable "environ" which must be * declared as a char **, and must be terminated by a NULL (it is recommended @@ -100,32 +123,7 @@ char *getenv(const char *name) } return NULL; } - -static __attribute__((unused)) -unsigned long getauxval(unsigned long type) -{ - const unsigned long *auxv = _auxv; - unsigned long ret; - - if (!auxv) - return 0; - - while (1) { - if (!auxv[0] && !auxv[1]) { - ret = 0; - break; - } - - if (auxv[0] == type) { - ret = auxv[1]; - break; - } - - auxv += 2; - } - - return ret; -} +#endif /* NOLIBC_NO_RUNTIME */ static __attribute__((unused)) void *malloc(size_t len) @@ -147,9 +145,9 @@ void *malloc(size_t len) static __attribute__((unused)) void *calloc(size_t size, size_t nmemb) { - size_t x = size * nmemb; + size_t x; - if (__builtin_expect(size && ((x / size) != nmemb), 0)) { + if (__builtin_expect(__builtin_mul_overflow(size, nmemb, &x), 0)) { SET_ERRNO(ENOMEM); return NULL; } @@ -190,36 +188,91 @@ void *realloc(void *old_ptr, size_t new_size) return ret; } -/* Converts the unsigned long integer <in> to its hex representation into +/* Converts the unsigned 64bit integer <in> to base <base> ascii into * buffer <buffer>, which must be long enough to store the number and the - * trailing zero (17 bytes for "ffffffffffffffff" or 9 for "ffffffff"). The - * buffer is filled from the first byte, and the number of characters emitted - * (not counting the trailing zero) is returned. The function is constructed - * in a way to optimize the code size and avoid any divide that could add a - * dependency on large external functions. + * trailing zero. The buffer is filled from the first byte, and the number + * of characters emitted (not counting the trailing zero) is returned. + * The function uses 'multiply by reciprocal' for the divisions and + * requires the caller pass the correct reciprocal. + * + * Note that unlike __div64_const32() in asm-generic/div64.h there isn't + * an extra shift done (by ___p), the reciprocal has to be lower resulting + * in a slightly low quotient. + * Keep things simple by correcting for the error. + * This also saves calculating the 'low * low' product (e2 below) which is + * very unlikely to be significant. + * + * Some maths: + * recip = p2 / base - e1; // With e1 < base. + * q = (recip * in - e2) / p2; // With e2 < p2. + * = base / in - (e1 * in + e2) / p2; + * > base / in - (e1 * p2 + p2) / p2; + * = base / in - ((e1 + 1) * p2) / p2; + * > base / in - base; + * So the maximum error is less than 'base'. + * Hence the largest possible digit is '2 * base - 1'. + * For base 10 e1 is 6 and you can get digits of 15 (eg from 2**64-1). + * Error e1 is largest for a base that is a factor of 2**64+1, the smallest is 274177 + * and converting 2**42-1 in base 274177 does generate a digit of 274177+274175. + * This all means only a single correction is needed rather than a loop. + * + * __int128 isn't used for mips because gcc prior to 10.0 will call + * __multi3 for MIPS64r6. The same also happens for SPARC and clang. */ -static __attribute__((unused)) -int utoh_r(unsigned long in, char *buffer) +#define _NOLIBC_U64TOA_RECIP(base) ((base) & 1 ? ~0ull / (base) : (1ull << 63) / ((base) / 2)) +static __attribute__((unused, noinline)) +int _nolibc_u64toa_base(uint64_t in, char *buffer, unsigned int base, uint64_t recip) { - signed char pos = (~0UL > 0xfffffffful) ? 60 : 28; - int digits = 0; - int dig; + unsigned int digits = 0; + unsigned int dig; + uint64_t q; + char *p; + /* Generate least significant digit first */ do { - dig = in >> pos; - in -= (uint64_t)dig << pos; - pos -= 4; - if (dig || digits || pos < 0) { - if (dig > 9) - dig += 'a' - '0' - 10; - buffer[digits++] = '0' + dig; +#if defined(__SIZEOF_INT128__) && !defined(__mips__) && !defined(__sparc__) + q = ((unsigned __int128)in * recip) >> 64; +#else + uint64_t p = (uint32_t)in * (recip >> 32); + q = (in >> 32) * (recip >> 32) + (p >> 32); + p = (uint32_t)p + (in >> 32) * (uint32_t)recip; + q += p >> 32; +#endif + dig = in - q * base; + /* Correct for any rounding errors */ + if (dig >= base) { + dig -= base; + q++; } - } while (pos >= 0); + if (dig > 9) + dig += 'a' - '0' - 10; + buffer[digits++] = '0' + dig; + } while ((in = q)); buffer[digits] = 0; + + /* Order reverse to result */ + for (p = buffer + digits - 1; p > buffer; buffer++, p--) { + dig = *buffer; + *buffer = *p; + *p = dig; + } + return digits; } +/* Converts the unsigned long integer <in> to its hex representation into + * buffer <buffer>, which must be long enough to store the number and the + * trailing zero (17 bytes for "ffffffffffffffff" or 9 for "ffffffff"). The + * buffer is filled from the first byte, and the number of characters emitted + * (not counting the trailing zero) is returned. + */ +static __inline__ __attribute__((unused)) +int utoh_r(unsigned long in, char *buffer) +{ + return _nolibc_u64toa_base(in, buffer, 16, _NOLIBC_U64TOA_RECIP(16)); +} + /* converts unsigned long <in> to an hex string using the static itoa_buffer * and returns the pointer to that string. */ @@ -235,30 +288,11 @@ char *utoh(unsigned long in) * trailing zero (21 bytes for 18446744073709551615 in 64-bit, 11 for * 4294967295 in 32-bit). The buffer is filled from the first byte, and the * number of characters emitted (not counting the trailing zero) is returned. - * The function is constructed in a way to optimize the code size and avoid - * any divide that could add a dependency on large external functions. */ -static __attribute__((unused)) +static __inline__ __attribute__((unused)) int utoa_r(unsigned long in, char *buffer) { - unsigned long lim; - int digits = 0; - int pos = (~0UL > 0xfffffffful) ? 19 : 9; - int dig; - - do { - for (dig = 0, lim = 1; dig < pos; dig++) - lim *= 10; - - if (digits || in >= lim || !pos) { - for (dig = 0; in >= lim; dig++) - in -= lim; - buffer[digits++] = '0' + dig; - } - } while (pos--); - - buffer[digits] = 0; - return digits; + return _nolibc_u64toa_base(in, buffer, 10, _NOLIBC_U64TOA_RECIP(10)); } /* Converts the signed long integer <in> to its string representation into @@ -274,7 +308,7 @@ int itoa_r(long in, char *buffer) int len = 0; if (in < 0) { - in = -in; + in = -(unsigned long)in; *(ptr++) = '-'; len++; } @@ -326,34 +360,12 @@ char *utoa(unsigned long in) * buffer <buffer>, which must be long enough to store the number and the * trailing zero (17 bytes for "ffffffffffffffff"). The buffer is filled from * the first byte, and the number of characters emitted (not counting the - * trailing zero) is returned. The function is constructed in a way to optimize - * the code size and avoid any divide that could add a dependency on large - * external functions. + * trailing zero) is returned. */ -static __attribute__((unused)) +static __inline__ __attribute__((unused)) int u64toh_r(uint64_t in, char *buffer) { - signed char pos = 60; - int digits = 0; - int dig; - - do { - if (sizeof(long) >= 8) { - dig = (in >> pos) & 0xF; - } else { - /* 32-bit platforms: avoid a 64-bit shift */ - uint32_t d = (pos >= 32) ? (in >> 32) : in; - dig = (d >> (pos & 31)) & 0xF; - } - if (dig > 9) - dig += 'a' - '0' - 10; - pos -= 4; - if (dig || digits || pos < 0) - buffer[digits++] = '0' + dig; - } while (pos >= 0); - - buffer[digits] = 0; - return digits; + return _nolibc_u64toa_base(in, buffer, 16, _NOLIBC_U64TOA_RECIP(16)); } /* converts uint64_t <in> to an hex string using the static itoa_buffer and @@ -370,31 +382,12 @@ char *u64toh(uint64_t in) * buffer <buffer>, which must be long enough to store the number and the * trailing zero (21 bytes for 18446744073709551615). The buffer is filled from * the first byte, and the number of characters emitted (not counting the - * trailing zero) is returned. The function is constructed in a way to optimize - * the code size and avoid any divide that could add a dependency on large - * external functions. + * trailing zero) is returned. */ -static __attribute__((unused)) +static __inline__ __attribute__((unused)) int u64toa_r(uint64_t in, char *buffer) { - unsigned long long lim; - int digits = 0; - int pos = 19; /* start with the highest possible digit */ - int dig; - - do { - for (dig = 0, lim = 1; dig < pos; dig++) - lim *= 10; - - if (digits || in >= lim || !pos) { - for (dig = 0; in >= lim; dig++) - in -= lim; - buffer[digits++] = '0' + dig; - } - } while (pos--); - - buffer[digits] = 0; - return digits; + return _nolibc_u64toa_base(in, buffer, 10, _NOLIBC_U64TOA_RECIP(10)); } /* Converts the signed 64-bit integer <in> to its string representation into @@ -410,7 +403,7 @@ int i64toa_r(int64_t in, char *buffer) int len = 0; if (in < 0) { - in = -in; + in = -(uint64_t)in; *(ptr++) = '-'; len++; } @@ -547,7 +540,4 @@ uintmax_t strtoumax(const char *nptr, char **endptr, int base) return __strtox(nptr, endptr, base, 0, UINTMAX_MAX); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_STDLIB_H */ diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h index 9ec9c24f38c0..4000926f44ac 100644 --- a/tools/include/nolibc/string.h +++ b/tools/include/nolibc/string.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_STRING_H #define _NOLIBC_STRING_H @@ -32,6 +35,7 @@ int memcmp(const void *s1, const void *s2, size_t n) /* might be ignored by the compiler without -ffreestanding, then found as * missing. */ +void *memmove(void *dst, const void *src, size_t len); __attribute__((weak,unused,section(".text.nolibc_memmove"))) void *memmove(void *dst, const void *src, size_t len) { @@ -56,6 +60,7 @@ void *memmove(void *dst, const void *src, size_t len) #ifndef NOLIBC_ARCH_HAS_MEMCPY /* must be exported, as it's used by libgcc on ARM */ +void *memcpy(void *dst, const void *src, size_t len); __attribute__((weak,unused,section(".text.nolibc_memcpy"))) void *memcpy(void *dst, const void *src, size_t len) { @@ -73,6 +78,7 @@ void *memcpy(void *dst, const void *src, size_t len) /* might be ignored by the compiler without -ffreestanding, then found as * missing. */ +void *memset(void *dst, int b, size_t len); __attribute__((weak,unused,section(".text.nolibc_memset"))) void *memset(void *dst, int b, size_t len) { @@ -87,6 +93,21 @@ void *memset(void *dst, int b, size_t len) } #endif /* #ifndef NOLIBC_ARCH_HAS_MEMSET */ +#ifndef NOLIBC_ARCH_HAS_MEMCHR +static __attribute__((unused)) +void *memchr(const void *s, int c, size_t len) +{ + char *p = (char *)s; + + while (len--) { + if (*p == (char)c) + return p; + p++; + } + return NULL; +} +#endif /* #ifndef NOLIBC_ARCH_HAS_MEMCHR */ + static __attribute__((unused)) char *strchr(const char *s, int c) { @@ -124,6 +145,7 @@ char *strcpy(char *dst, const char *src) * thus itself, hence the asm() statement below that's meant to disable this * confusing practice. */ +size_t strlen(const char *str); __attribute__((weak,unused,section(".text.nolibc_strlen"))) size_t strlen(const char *str) { @@ -285,7 +307,40 @@ char *strrchr(const char *s, int c) return (char *)ret; } -/* make sure to include all global symbols */ -#include "nolibc.h" +static __attribute__((unused)) +char *strstr(const char *haystack, const char *needle) +{ + size_t len_haystack, len_needle; + + len_needle = strlen(needle); + if (!len_needle) + return NULL; + + len_haystack = strlen(haystack); + while (len_haystack >= len_needle) { + if (!memcmp(haystack, needle, len_needle)) + return (char *)haystack; + haystack++; + len_haystack--; + } + + return NULL; +} + +static __attribute__((unused)) +int tolower(int c) +{ + if (c >= 'A' && c <= 'Z') + return c - 'A' + 'a'; + return c; +} + +static __attribute__((unused)) +int toupper(int c) +{ + if (c >= 'a' && c <= 'z') + return c - 'a' + 'A'; + return c; +} #endif /* _NOLIBC_STRING_H */ diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index d4a5c2399a66..548f94d96ed2 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -4,28 +4,27 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_SYS_H #define _NOLIBC_SYS_H #include "std.h" /* system includes */ -#include <asm/unistd.h> -#include <asm/signal.h> /* for SIGCHLD */ -#include <asm/ioctls.h> -#include <asm/mman.h> +#include <linux/unistd.h> +#include <linux/signal.h> /* for SIGCHLD */ +#include <linux/termios.h> +#include <linux/mman.h> #include <linux/fs.h> #include <linux/loop.h> #include <linux/time.h> #include <linux/auxvec.h> #include <linux/fcntl.h> /* for O_* and AT_* */ +#include <linux/sched.h> /* for CLONE_* */ #include <linux/stat.h> /* for statx() */ -#include <linux/prctl.h> -#include <linux/resource.h> -#include <linux/utsname.h> -#include <linux/signal.h> -#include "arch.h" #include "errno.h" #include "stdarg.h" #include "types.h" @@ -46,16 +45,41 @@ : __sysret_arg; /* return original value */ \ }) -/* Syscall ENOSYS helper: Avoids unused-parameter warnings and provides a - * debugging hook. +/* Syscall ENOSYS helper: Avoids unused-parameter warnings, provides compile + * time validation and a debugging hook. */ +#if defined(NOLIBC_COMPILE_TIME_ENOSYS) static __inline__ int __nolibc_enosys(const char *syscall, ...) { (void)syscall; return -ENOSYS; } +#elif __nolibc_has_attribute(error) +__attribute__((error("system call not implemented"))) +extern int __nolibc_enosys(const char *syscall, ...); + +#else +static __inline__ int __nolibc_enosys(const char *syscall, ...) +{ + extern int __nolibc_enosys_error; + (void)syscall; + + return __nolibc_enosys_error; +} +#endif + + +/* + * Helper for 32-bit machines where a 64-bit syscall arg needs to be split into + * two 32-bit parts while making sure the order of the low/high parts are correct + * for the endianness: + * __NOLIBC_LLARGPART(x, 0), __NOLIBC_LLARGPART(x, 1) + */ +#define __NOLIBC_LLARGPART(_arg, _part) \ + (((union { long long ll; long l[2]; }) { .ll = _arg }).l[_part]) + /* Functions in this file only describe syscalls. They're declared static so * that the compiler usually decides to inline them while still being allowed @@ -64,7 +88,7 @@ static __inline__ int __nolibc_enosys(const char *syscall, ...) * - the "internal" ones, which matches the raw syscall interface at the * kernel level, which may sometimes slightly differ from the documented * libc-level ones. For example most of them return either a valid value - * or -errno. All of these are prefixed with "sys_". They may be called + * or -errno. All of these are prefixed with "_sys_". They may be called * by non-portable applications if desired. * * - the "exported" ones, whose interface must closely match the one @@ -86,15 +110,15 @@ static __inline__ int __nolibc_enosys(const char *syscall, ...) */ static __attribute__((unused)) -void *sys_brk(void *addr) +void *_sys_brk(void *addr) { - return (void *)my_syscall1(__NR_brk, addr); + return (void *)(unsigned long)__nolibc_syscall1(__NR_brk, addr); } static __attribute__((unused)) int brk(void *addr) { - void *ret = sys_brk(addr); + void *ret = _sys_brk(addr); if (!ret) { SET_ERRNO(ENOMEM); @@ -107,9 +131,9 @@ static __attribute__((unused)) void *sbrk(intptr_t inc) { /* first call to find current end */ - void *ret = sys_brk(0); + void *ret = _sys_brk(NULL); - if (ret && sys_brk(ret + inc) == ret + inc) + if (ret && _sys_brk(ret + inc) == ret + inc) return ret + inc; SET_ERRNO(ENOMEM); @@ -119,18 +143,31 @@ void *sbrk(intptr_t inc) /* * int chdir(const char *path); + * int fchdir(int fildes); */ static __attribute__((unused)) -int sys_chdir(const char *path) +int _sys_chdir(const char *path) { - return my_syscall1(__NR_chdir, path); + return __nolibc_syscall1(__NR_chdir, path); } static __attribute__((unused)) int chdir(const char *path) { - return __sysret(sys_chdir(path)); + return __sysret(_sys_chdir(path)); +} + +static __attribute__((unused)) +int _sys_fchdir(int fildes) +{ + return __nolibc_syscall1(__NR_fchdir, fildes); +} + +static __attribute__((unused)) +int fchdir(int fildes) +{ + return __sysret(_sys_fchdir(fildes)); } @@ -139,21 +176,19 @@ int chdir(const char *path) */ static __attribute__((unused)) -int sys_chmod(const char *path, mode_t mode) +int _sys_chmod(const char *path, mode_t mode) { -#ifdef __NR_fchmodat - return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0); -#elif defined(__NR_chmod) - return my_syscall2(__NR_chmod, path, mode); +#if defined(__NR_fchmodat) + return __nolibc_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0); #else - return __nolibc_enosys(__func__, path, mode); + return __nolibc_syscall2(__NR_chmod, path, mode); #endif } static __attribute__((unused)) int chmod(const char *path, mode_t mode) { - return __sysret(sys_chmod(path, mode)); + return __sysret(_sys_chmod(path, mode)); } @@ -162,21 +197,19 @@ int chmod(const char *path, mode_t mode) */ static __attribute__((unused)) -int sys_chown(const char *path, uid_t owner, gid_t group) +int _sys_chown(const char *path, uid_t owner, gid_t group) { -#ifdef __NR_fchownat - return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0); -#elif defined(__NR_chown) - return my_syscall3(__NR_chown, path, owner, group); +#if defined(__NR_fchownat) + return __nolibc_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0); #else - return __nolibc_enosys(__func__, path, owner, group); + return __nolibc_syscall3(__NR_chown, path, owner, group); #endif } static __attribute__((unused)) int chown(const char *path, uid_t owner, gid_t group) { - return __sysret(sys_chown(path, owner, group)); + return __sysret(_sys_chown(path, owner, group)); } @@ -185,15 +218,15 @@ int chown(const char *path, uid_t owner, gid_t group) */ static __attribute__((unused)) -int sys_chroot(const char *path) +int _sys_chroot(const char *path) { - return my_syscall1(__NR_chroot, path); + return __nolibc_syscall1(__NR_chroot, path); } static __attribute__((unused)) int chroot(const char *path) { - return __sysret(sys_chroot(path)); + return __sysret(_sys_chroot(path)); } @@ -202,15 +235,15 @@ int chroot(const char *path) */ static __attribute__((unused)) -int sys_close(int fd) +int _sys_close(int fd) { - return my_syscall1(__NR_close, fd); + return __nolibc_syscall1(__NR_close, fd); } static __attribute__((unused)) int close(int fd) { - return __sysret(sys_close(fd)); + return __sysret(_sys_close(fd)); } @@ -219,15 +252,15 @@ int close(int fd) */ static __attribute__((unused)) -int sys_dup(int fd) +int _sys_dup(int fd) { - return my_syscall1(__NR_dup, fd); + return __nolibc_syscall1(__NR_dup, fd); } static __attribute__((unused)) int dup(int fd) { - return __sysret(sys_dup(fd)); + return __sysret(_sys_dup(fd)); } @@ -236,21 +269,32 @@ int dup(int fd) */ static __attribute__((unused)) -int sys_dup2(int old, int new) +int _sys_dup2(int old, int new) { -#ifdef __NR_dup3 - return my_syscall3(__NR_dup3, old, new, 0); -#elif defined(__NR_dup2) - return my_syscall2(__NR_dup2, old, new); +#if defined(__NR_dup3) + int ret, nr_fcntl; + +#ifdef __NR_fcntl64 + nr_fcntl = __NR_fcntl64; #else - return __nolibc_enosys(__func__, old, new); + nr_fcntl = __NR_fcntl; +#endif + + if (old == new) { + ret = __nolibc_syscall2(nr_fcntl, old, F_GETFD); + return ret < 0 ? ret : old; + } + + return __nolibc_syscall3(__NR_dup3, old, new, 0); +#else + return __nolibc_syscall2(__NR_dup2, old, new); #endif } static __attribute__((unused)) int dup2(int old, int new) { - return __sysret(sys_dup2(old, new)); + return __sysret(_sys_dup2(old, new)); } @@ -258,17 +302,17 @@ int dup2(int old, int new) * int dup3(int old, int new, int flags); */ -#ifdef __NR_dup3 +#if defined(__NR_dup3) static __attribute__((unused)) -int sys_dup3(int old, int new, int flags) +int _sys_dup3(int old, int new, int flags) { - return my_syscall3(__NR_dup3, old, new, flags); + return __nolibc_syscall3(__NR_dup3, old, new, flags); } static __attribute__((unused)) int dup3(int old, int new, int flags) { - return __sysret(sys_dup3(old, new, flags)); + return __sysret(_sys_dup3(old, new, flags)); } #endif @@ -278,15 +322,15 @@ int dup3(int old, int new, int flags) */ static __attribute__((unused)) -int sys_execve(const char *filename, char *const argv[], char *const envp[]) +int _sys_execve(const char *filename, char *const argv[], char *const envp[]) { - return my_syscall3(__NR_execve, filename, argv, envp); + return __nolibc_syscall3(__NR_execve, filename, argv, envp); } static __attribute__((unused)) int execve(const char *filename, char *const argv[], char *const envp[]) { - return __sysret(sys_execve(filename, argv, envp)); + return __sysret(_sys_execve(filename, argv, envp)); } @@ -295,16 +339,22 @@ int execve(const char *filename, char *const argv[], char *const envp[]) */ static __attribute__((noreturn,unused)) -void sys_exit(int status) +void _sys_exit(int status) { - my_syscall1(__NR_exit, status & 255); + __nolibc_syscall1(__NR_exit, status & 255); while(1); /* shut the "noreturn" warnings. */ } static __attribute__((noreturn,unused)) +void _exit(int status) +{ + _sys_exit(status); +} + +static __attribute__((noreturn,unused)) void exit(int status) { - sys_exit(status); + _exit(status); } @@ -312,20 +362,18 @@ void exit(int status) * pid_t fork(void); */ -#ifndef sys_fork +#ifndef _sys_fork static __attribute__((unused)) -pid_t sys_fork(void) +pid_t _sys_fork(void) { -#ifdef __NR_clone +#if defined(__NR_clone) /* note: some archs only have clone() and not fork(). Different archs * have a different API, but most archs have the flags on first arg and * will not use the rest with no other flag. */ - return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0); -#elif defined(__NR_fork) - return my_syscall0(__NR_fork); + return __nolibc_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0); #else - return __nolibc_enosys(__func__); + return __nolibc_syscall0(__NR_fork); #endif } #endif @@ -333,24 +381,42 @@ pid_t sys_fork(void) static __attribute__((unused)) pid_t fork(void) { - return __sysret(sys_fork()); + return __sysret(_sys_fork()); } +#ifndef _sys_vfork +static __attribute__((unused)) +pid_t _sys_vfork(void) +{ +#if defined(__NR_clone) + /* See the note in _sys_fork(). */ + return __nolibc_syscall5(__NR_clone, CLONE_VM | CLONE_VFORK | SIGCHLD, 0, 0, 0, 0); +#elif defined(__NR_vfork) + return __nolibc_syscall0(__NR_vfork); +#endif +} +#endif + +static __attribute__((unused)) +pid_t vfork(void) +{ + return __sysret(_sys_vfork()); +} /* * int fsync(int fd); */ static __attribute__((unused)) -int sys_fsync(int fd) +int _sys_fsync(int fd) { - return my_syscall1(__NR_fsync, fd); + return __nolibc_syscall1(__NR_fsync, fd); } static __attribute__((unused)) int fsync(int fd) { - return __sysret(sys_fsync(fd)); + return __sysret(_sys_fsync(fd)); } @@ -359,15 +425,15 @@ int fsync(int fd) */ static __attribute__((unused)) -int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count) +int _sys_getdents64(int fd, struct linux_dirent64 *dirp, int count) { - return my_syscall3(__NR_getdents64, fd, dirp, count); + return __nolibc_syscall3(__NR_getdents64, fd, dirp, count); } static __attribute__((unused)) int getdents64(int fd, struct linux_dirent64 *dirp, int count) { - return __sysret(sys_getdents64(fd, dirp, count)); + return __sysret(_sys_getdents64(fd, dirp, count)); } @@ -376,19 +442,19 @@ int getdents64(int fd, struct linux_dirent64 *dirp, int count) */ static __attribute__((unused)) -uid_t sys_geteuid(void) +uid_t _sys_geteuid(void) { -#ifdef __NR_geteuid32 - return my_syscall0(__NR_geteuid32); +#if defined(__NR_geteuid32) + return __nolibc_syscall0(__NR_geteuid32); #else - return my_syscall0(__NR_geteuid); + return __nolibc_syscall0(__NR_geteuid); #endif } static __attribute__((unused)) uid_t geteuid(void) { - return sys_geteuid(); + return _sys_geteuid(); } @@ -397,15 +463,15 @@ uid_t geteuid(void) */ static __attribute__((unused)) -pid_t sys_getpgid(pid_t pid) +pid_t _sys_getpgid(pid_t pid) { - return my_syscall1(__NR_getpgid, pid); + return __nolibc_syscall1(__NR_getpgid, pid); } static __attribute__((unused)) pid_t getpgid(pid_t pid) { - return __sysret(sys_getpgid(pid)); + return __sysret(_sys_getpgid(pid)); } @@ -414,15 +480,15 @@ pid_t getpgid(pid_t pid) */ static __attribute__((unused)) -pid_t sys_getpgrp(void) +pid_t _sys_getpgrp(void) { - return sys_getpgid(0); + return _sys_getpgid(0); } static __attribute__((unused)) pid_t getpgrp(void) { - return sys_getpgrp(); + return _sys_getpgrp(); } @@ -431,15 +497,15 @@ pid_t getpgrp(void) */ static __attribute__((unused)) -pid_t sys_getpid(void) +pid_t _sys_getpid(void) { - return my_syscall0(__NR_getpid); + return __nolibc_syscall0(__NR_getpid); } static __attribute__((unused)) pid_t getpid(void) { - return sys_getpid(); + return _sys_getpid(); } @@ -448,15 +514,15 @@ pid_t getpid(void) */ static __attribute__((unused)) -pid_t sys_getppid(void) +pid_t _sys_getppid(void) { - return my_syscall0(__NR_getppid); + return __nolibc_syscall0(__NR_getppid); } static __attribute__((unused)) pid_t getppid(void) { - return sys_getppid(); + return _sys_getppid(); } @@ -465,17 +531,18 @@ pid_t getppid(void) */ static __attribute__((unused)) -pid_t sys_gettid(void) +pid_t _sys_gettid(void) { - return my_syscall0(__NR_gettid); + return __nolibc_syscall0(__NR_gettid); } static __attribute__((unused)) pid_t gettid(void) { - return sys_gettid(); + return _sys_gettid(); } +#ifndef NOLIBC_NO_RUNTIME static unsigned long getauxval(unsigned long key); /* @@ -487,80 +554,43 @@ int getpagesize(void) { return __sysret((int)getauxval(AT_PAGESZ) ?: -ENOENT); } - - -/* - * int gettimeofday(struct timeval *tv, struct timezone *tz); - */ - -static __attribute__((unused)) -int sys_gettimeofday(struct timeval *tv, struct timezone *tz) -{ -#ifdef __NR_gettimeofday - return my_syscall2(__NR_gettimeofday, tv, tz); -#else - return __nolibc_enosys(__func__, tv, tz); -#endif -} - -static __attribute__((unused)) -int gettimeofday(struct timeval *tv, struct timezone *tz) -{ - return __sysret(sys_gettimeofday(tv, tz)); -} - +#endif /* NOLIBC_NO_RUNTIME */ /* * uid_t getuid(void); */ static __attribute__((unused)) -uid_t sys_getuid(void) +uid_t _sys_getuid(void) { -#ifdef __NR_getuid32 - return my_syscall0(__NR_getuid32); +#if defined(__NR_getuid32) + return __nolibc_syscall0(__NR_getuid32); #else - return my_syscall0(__NR_getuid); + return __nolibc_syscall0(__NR_getuid); #endif } static __attribute__((unused)) uid_t getuid(void) { - return sys_getuid(); + return _sys_getuid(); } /* - * int ioctl(int fd, unsigned long req, void *value); - */ - -static __attribute__((unused)) -int sys_ioctl(int fd, unsigned long req, void *value) -{ - return my_syscall3(__NR_ioctl, fd, req, value); -} - -static __attribute__((unused)) -int ioctl(int fd, unsigned long req, void *value) -{ - return __sysret(sys_ioctl(fd, req, value)); -} - -/* * int kill(pid_t pid, int signal); */ static __attribute__((unused)) -int sys_kill(pid_t pid, int signal) +int _sys_kill(pid_t pid, int signal) { - return my_syscall2(__NR_kill, pid, signal); + return __nolibc_syscall2(__NR_kill, pid, signal); } static __attribute__((unused)) int kill(pid_t pid, int signal) { - return __sysret(sys_kill(pid, signal)); + return __sysret(_sys_kill(pid, signal)); } @@ -569,21 +599,19 @@ int kill(pid_t pid, int signal) */ static __attribute__((unused)) -int sys_link(const char *old, const char *new) +int _sys_link(const char *old, const char *new) { -#ifdef __NR_linkat - return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0); -#elif defined(__NR_link) - return my_syscall2(__NR_link, old, new); +#if defined(__NR_linkat) + return __nolibc_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0); #else - return __nolibc_enosys(__func__, old, new); + return __nolibc_syscall2(__NR_link, old, new); #endif } static __attribute__((unused)) int link(const char *old, const char *new) { - return __sysret(sys_link(old, new)); + return __sysret(_sys_link(old, new)); } @@ -592,19 +620,35 @@ int link(const char *old, const char *new) */ static __attribute__((unused)) -off_t sys_lseek(int fd, off_t offset, int whence) +off_t _sys_lseek(int fd, off_t offset, int whence) { -#ifdef __NR_lseek - return my_syscall3(__NR_lseek, fd, offset, whence); +#if defined(__NR_llseek) || defined(__NR__llseek) + __kernel_loff_t loff = 0; + int ret, nr_llseek; + off_t result; + +#if defined(__NR_llseek) + nr_llseek = __NR_llseek; #else - return __nolibc_enosys(__func__, fd, offset, whence); + nr_llseek = __NR__llseek; +#endif + + ret = __nolibc_syscall5(nr_llseek, fd, offset >> 32, (uint32_t)offset, &loff, whence); + if (ret < 0) + result = ret; + else + result = loff; + + return result; +#else + return __nolibc_syscall3(__NR_lseek, fd, offset, whence); #endif } static __attribute__((unused)) off_t lseek(int fd, off_t offset, int whence) { - return __sysret(sys_lseek(fd, offset, whence)); + return __sysret(_sys_lseek(fd, offset, whence)); } @@ -613,21 +657,19 @@ off_t lseek(int fd, off_t offset, int whence) */ static __attribute__((unused)) -int sys_mkdir(const char *path, mode_t mode) +int _sys_mkdir(const char *path, mode_t mode) { -#ifdef __NR_mkdirat - return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode); -#elif defined(__NR_mkdir) - return my_syscall2(__NR_mkdir, path, mode); +#if defined(__NR_mkdirat) + return __nolibc_syscall3(__NR_mkdirat, AT_FDCWD, path, mode); #else - return __nolibc_enosys(__func__, path, mode); + return __nolibc_syscall2(__NR_mkdir, path, mode); #endif } static __attribute__((unused)) int mkdir(const char *path, mode_t mode) { - return __sysret(sys_mkdir(path, mode)); + return __sysret(_sys_mkdir(path, mode)); } /* @@ -635,21 +677,19 @@ int mkdir(const char *path, mode_t mode) */ static __attribute__((unused)) -int sys_rmdir(const char *path) +int _sys_rmdir(const char *path) { -#ifdef __NR_rmdir - return my_syscall1(__NR_rmdir, path); -#elif defined(__NR_unlinkat) - return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR); +#if defined(__NR_rmdir) + return __nolibc_syscall1(__NR_rmdir, path); #else - return __nolibc_enosys(__func__, path); + return __nolibc_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR); #endif } static __attribute__((unused)) int rmdir(const char *path) { - return __sysret(sys_rmdir(path)); + return __sysret(_sys_rmdir(path)); } @@ -658,121 +698,19 @@ int rmdir(const char *path) */ static __attribute__((unused)) -long sys_mknod(const char *path, mode_t mode, dev_t dev) +long _sys_mknod(const char *path, mode_t mode, dev_t dev) { -#ifdef __NR_mknodat - return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev); -#elif defined(__NR_mknod) - return my_syscall3(__NR_mknod, path, mode, dev); +#if defined(__NR_mknodat) + return __nolibc_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev); #else - return __nolibc_enosys(__func__, path, mode, dev); + return __nolibc_syscall3(__NR_mknod, path, mode, dev); #endif } static __attribute__((unused)) int mknod(const char *path, mode_t mode, dev_t dev) { - return __sysret(sys_mknod(path, mode, dev)); -} - -#ifndef sys_mmap -static __attribute__((unused)) -void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, - off_t offset) -{ - int n; - -#if defined(__NR_mmap2) - n = __NR_mmap2; - offset >>= 12; -#else - n = __NR_mmap; -#endif - - return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset); -} -#endif - -/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret() - * which returns -1 upon error and still satisfy user land that checks for - * MAP_FAILED. - */ - -static __attribute__((unused)) -void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) -{ - void *ret = sys_mmap(addr, length, prot, flags, fd, offset); - - if ((unsigned long)ret >= -4095UL) { - SET_ERRNO(-(long)ret); - ret = MAP_FAILED; - } - return ret; -} - -static __attribute__((unused)) -int sys_munmap(void *addr, size_t length) -{ - return my_syscall2(__NR_munmap, addr, length); -} - -static __attribute__((unused)) -int munmap(void *addr, size_t length) -{ - return __sysret(sys_munmap(addr, length)); -} - -/* - * int mount(const char *source, const char *target, - * const char *fstype, unsigned long flags, - * const void *data); - */ -static __attribute__((unused)) -int sys_mount(const char *src, const char *tgt, const char *fst, - unsigned long flags, const void *data) -{ - return my_syscall5(__NR_mount, src, tgt, fst, flags, data); -} - -static __attribute__((unused)) -int mount(const char *src, const char *tgt, - const char *fst, unsigned long flags, - const void *data) -{ - return __sysret(sys_mount(src, tgt, fst, flags, data)); -} - - -/* - * int open(const char *path, int flags[, mode_t mode]); - */ - -static __attribute__((unused)) -int sys_open(const char *path, int flags, mode_t mode) -{ -#ifdef __NR_openat - return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); -#elif defined(__NR_open) - return my_syscall3(__NR_open, path, flags, mode); -#else - return __nolibc_enosys(__func__, path, flags, mode); -#endif -} - -static __attribute__((unused)) -int open(const char *path, int flags, ...) -{ - mode_t mode = 0; - - if (flags & O_CREAT) { - va_list args; - - va_start(args, flags); - mode = va_arg(args, int); - va_end(args); - } - - return __sysret(sys_open(path, flags, mode)); + return __sysret(_sys_mknod(path, mode, dev)); } @@ -782,15 +720,15 @@ int open(const char *path, int flags, ...) */ static __attribute__((unused)) -int sys_pipe2(int pipefd[2], int flags) +int _sys_pipe2(int pipefd[2], int flags) { - return my_syscall2(__NR_pipe2, pipefd, flags); + return __nolibc_syscall2(__NR_pipe2, pipefd, flags); } static __attribute__((unused)) int pipe2(int pipefd[2], int flags) { - return __sysret(sys_pipe2(pipefd, flags)); + return __sysret(_sys_pipe2(pipefd, flags)); } static __attribute__((unused)) @@ -801,68 +739,19 @@ int pipe(int pipefd[2]) /* - * int prctl(int option, unsigned long arg2, unsigned long arg3, - * unsigned long arg4, unsigned long arg5); - */ - -static __attribute__((unused)) -int sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5) -{ - return my_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5); -} - -static __attribute__((unused)) -int prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5) -{ - return __sysret(sys_prctl(option, arg2, arg3, arg4, arg5)); -} - - -/* * int pivot_root(const char *new, const char *old); */ static __attribute__((unused)) -int sys_pivot_root(const char *new, const char *old) +int _sys_pivot_root(const char *new, const char *old) { - return my_syscall2(__NR_pivot_root, new, old); + return __nolibc_syscall2(__NR_pivot_root, new, old); } static __attribute__((unused)) int pivot_root(const char *new, const char *old) { - return __sysret(sys_pivot_root(new, old)); -} - - -/* - * int poll(struct pollfd *fds, int nfds, int timeout); - */ - -static __attribute__((unused)) -int sys_poll(struct pollfd *fds, int nfds, int timeout) -{ -#if defined(__NR_ppoll) - struct timespec t; - - if (timeout >= 0) { - t.tv_sec = timeout / 1000; - t.tv_nsec = (timeout % 1000) * 1000000; - } - return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); -#elif defined(__NR_poll) - return my_syscall3(__NR_poll, fds, nfds, timeout); -#else - return __nolibc_enosys(__func__, fds, nfds, timeout); -#endif -} - -static __attribute__((unused)) -int poll(struct pollfd *fds, int nfds, int timeout) -{ - return __sysret(sys_poll(fds, nfds, timeout)); + return __sysret(_sys_pivot_root(new, old)); } @@ -871,70 +760,15 @@ int poll(struct pollfd *fds, int nfds, int timeout) */ static __attribute__((unused)) -ssize_t sys_read(int fd, void *buf, size_t count) +ssize_t _sys_read(int fd, void *buf, size_t count) { - return my_syscall3(__NR_read, fd, buf, count); + return __nolibc_syscall3(__NR_read, fd, buf, count); } static __attribute__((unused)) ssize_t read(int fd, void *buf, size_t count) { - return __sysret(sys_read(fd, buf, count)); -} - - -/* - * int reboot(int cmd); - * <cmd> is among LINUX_REBOOT_CMD_* - */ - -static __attribute__((unused)) -ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg) -{ - return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg); -} - -static __attribute__((unused)) -int reboot(int cmd) -{ - return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0)); -} - - -/* - * int getrlimit(int resource, struct rlimit *rlim); - * int setrlimit(int resource, const struct rlimit *rlim); - */ - -static __attribute__((unused)) -int sys_prlimit64(pid_t pid, int resource, - const struct rlimit64 *new_limit, struct rlimit64 *old_limit) -{ - return my_syscall4(__NR_prlimit64, pid, resource, new_limit, old_limit); -} - -static __attribute__((unused)) -int getrlimit(int resource, struct rlimit *rlim) -{ - struct rlimit64 rlim64; - int ret; - - ret = __sysret(sys_prlimit64(0, resource, NULL, &rlim64)); - rlim->rlim_cur = rlim64.rlim_cur; - rlim->rlim_max = rlim64.rlim_max; - - return ret; -} - -static __attribute__((unused)) -int setrlimit(int resource, const struct rlimit *rlim) -{ - struct rlimit64 rlim64 = { - .rlim_cur = rlim->rlim_cur, - .rlim_max = rlim->rlim_max, - }; - - return __sysret(sys_prlimit64(0, resource, &rlim64, NULL)); + return __sysret(_sys_read(fd, buf, count)); } @@ -943,54 +777,15 @@ int setrlimit(int resource, const struct rlimit *rlim) */ static __attribute__((unused)) -int sys_sched_yield(void) +int _sys_sched_yield(void) { - return my_syscall0(__NR_sched_yield); + return __nolibc_syscall0(__NR_sched_yield); } static __attribute__((unused)) int sched_yield(void) { - return __sysret(sys_sched_yield()); -} - - -/* - * int select(int nfds, fd_set *read_fds, fd_set *write_fds, - * fd_set *except_fds, struct timeval *timeout); - */ - -static __attribute__((unused)) -int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) -{ -#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect) - struct sel_arg_struct { - unsigned long n; - fd_set *r, *w, *e; - struct timeval *t; - } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout }; - return my_syscall1(__NR_select, &arg); -#elif defined(__NR__newselect) - return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout); -#elif defined(__NR_select) - return my_syscall5(__NR_select, nfds, rfds, wfds, efds, timeout); -#elif defined(__NR_pselect6) - struct timespec t; - - if (timeout) { - t.tv_sec = timeout->tv_sec; - t.tv_nsec = timeout->tv_usec * 1000; - } - return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); -#else - return __nolibc_enosys(__func__, nfds, rfds, wfds, efds, timeout); -#endif -} - -static __attribute__((unused)) -int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) -{ - return __sysret(sys_select(nfds, rfds, wfds, efds, timeout)); + return __sysret(_sys_sched_yield()); } @@ -999,88 +794,42 @@ int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *t */ static __attribute__((unused)) -int sys_setpgid(pid_t pid, pid_t pgid) +int _sys_setpgid(pid_t pid, pid_t pgid) { - return my_syscall2(__NR_setpgid, pid, pgid); + return __nolibc_syscall2(__NR_setpgid, pid, pgid); } static __attribute__((unused)) int setpgid(pid_t pid, pid_t pgid) { - return __sysret(sys_setpgid(pid, pgid)); + return __sysret(_sys_setpgid(pid, pgid)); } - /* - * pid_t setsid(void); + * pid_t setpgrp(void) */ static __attribute__((unused)) -pid_t sys_setsid(void) +pid_t setpgrp(void) { - return my_syscall0(__NR_setsid); + return setpgid(0, 0); } -static __attribute__((unused)) -pid_t setsid(void) -{ - return __sysret(sys_setsid()); -} /* - * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf); - * int stat(const char *path, struct stat *buf); + * pid_t setsid(void); */ static __attribute__((unused)) -int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) +pid_t _sys_setsid(void) { -#ifdef __NR_statx - return my_syscall5(__NR_statx, fd, path, flags, mask, buf); -#else - return __nolibc_enosys(__func__, fd, path, flags, mask, buf); -#endif + return __nolibc_syscall0(__NR_setsid); } static __attribute__((unused)) -int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) -{ - return __sysret(sys_statx(fd, path, flags, mask, buf)); -} - - -static __attribute__((unused)) -int stat(const char *path, struct stat *buf) +pid_t setsid(void) { - struct statx statx; - long ret; - - ret = __sysret(sys_statx(AT_FDCWD, path, AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx)); - if (ret == -1) - return ret; - - buf->st_dev = ((statx.stx_dev_minor & 0xff) - | (statx.stx_dev_major << 8) - | ((statx.stx_dev_minor & ~0xff) << 12)); - buf->st_ino = statx.stx_ino; - buf->st_mode = statx.stx_mode; - buf->st_nlink = statx.stx_nlink; - buf->st_uid = statx.stx_uid; - buf->st_gid = statx.stx_gid; - buf->st_rdev = ((statx.stx_rdev_minor & 0xff) - | (statx.stx_rdev_major << 8) - | ((statx.stx_rdev_minor & ~0xff) << 12)); - buf->st_size = statx.stx_size; - buf->st_blksize = statx.stx_blksize; - buf->st_blocks = statx.stx_blocks; - buf->st_atim.tv_sec = statx.stx_atime.tv_sec; - buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec; - buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec; - buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec; - buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec; - buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec; - - return 0; + return __sysret(_sys_setsid()); } @@ -1089,21 +838,19 @@ int stat(const char *path, struct stat *buf) */ static __attribute__((unused)) -int sys_symlink(const char *old, const char *new) +int _sys_symlink(const char *old, const char *new) { -#ifdef __NR_symlinkat - return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new); -#elif defined(__NR_symlink) - return my_syscall2(__NR_symlink, old, new); +#if defined(__NR_symlinkat) + return __nolibc_syscall3(__NR_symlinkat, old, AT_FDCWD, new); #else - return __nolibc_enosys(__func__, old, new); + return __nolibc_syscall2(__NR_symlink, old, new); #endif } static __attribute__((unused)) int symlink(const char *old, const char *new) { - return __sysret(sys_symlink(old, new)); + return __sysret(_sys_symlink(old, new)); } @@ -1112,15 +859,15 @@ int symlink(const char *old, const char *new) */ static __attribute__((unused)) -mode_t sys_umask(mode_t mode) +mode_t _sys_umask(mode_t mode) { - return my_syscall1(__NR_umask, mode); + return __nolibc_syscall1(__NR_umask, mode); } static __attribute__((unused)) mode_t umask(mode_t mode) { - return sys_umask(mode); + return _sys_umask(mode); } @@ -1129,41 +876,15 @@ mode_t umask(mode_t mode) */ static __attribute__((unused)) -int sys_umount2(const char *path, int flags) +int _sys_umount2(const char *path, int flags) { - return my_syscall2(__NR_umount2, path, flags); + return __nolibc_syscall2(__NR_umount2, path, flags); } static __attribute__((unused)) int umount2(const char *path, int flags) { - return __sysret(sys_umount2(path, flags)); -} - - -/* - * int uname(struct utsname *buf); - */ - -struct utsname { - char sysname[65]; - char nodename[65]; - char release[65]; - char version[65]; - char machine[65]; - char domainname[65]; -}; - -static __attribute__((unused)) -int sys_uname(struct utsname *buf) -{ - return my_syscall1(__NR_uname, buf); -} - -static __attribute__((unused)) -int uname(struct utsname *buf) -{ - return __sysret(sys_uname(buf)); + return __sysret(_sys_umount2(path, flags)); } @@ -1172,74 +893,19 @@ int uname(struct utsname *buf) */ static __attribute__((unused)) -int sys_unlink(const char *path) +int _sys_unlink(const char *path) { -#ifdef __NR_unlinkat - return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0); -#elif defined(__NR_unlink) - return my_syscall1(__NR_unlink, path); +#if defined(__NR_unlinkat) + return __nolibc_syscall3(__NR_unlinkat, AT_FDCWD, path, 0); #else - return __nolibc_enosys(__func__, path); + return __nolibc_syscall1(__NR_unlink, path); #endif } static __attribute__((unused)) int unlink(const char *path) { - return __sysret(sys_unlink(path)); -} - - -/* - * pid_t wait(int *status); - * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage); - * pid_t waitpid(pid_t pid, int *status, int options); - */ - -static __attribute__((unused)) -pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ -#ifdef __NR_wait4 - return my_syscall4(__NR_wait4, pid, status, options, rusage); -#else - return __nolibc_enosys(__func__, pid, status, options, rusage); -#endif -} - -static __attribute__((unused)) -pid_t wait(int *status) -{ - return __sysret(sys_wait4(-1, status, 0, NULL)); -} - -static __attribute__((unused)) -pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ - return __sysret(sys_wait4(pid, status, options, rusage)); -} - - -static __attribute__((unused)) -pid_t waitpid(pid_t pid, int *status, int options) -{ - return __sysret(sys_wait4(pid, status, options, NULL)); -} - - -/* - * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); - */ - -static __attribute__((unused)) -int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage) -{ - return my_syscall5(__NR_waitid, which, pid, infop, options, rusage); -} - -static __attribute__((unused)) -int waitid(int which, pid_t pid, siginfo_t *infop, int options) -{ - return __sysret(sys_waitid(which, pid, infop, options, NULL)); + return __sysret(_sys_unlink(path)); } @@ -1248,15 +914,15 @@ int waitid(int which, pid_t pid, siginfo_t *infop, int options) */ static __attribute__((unused)) -ssize_t sys_write(int fd, const void *buf, size_t count) +ssize_t _sys_write(int fd, const void *buf, size_t count) { - return my_syscall3(__NR_write, fd, buf, count); + return __nolibc_syscall3(__NR_write, fd, buf, count); } static __attribute__((unused)) ssize_t write(int fd, const void *buf, size_t count) { - return __sysret(sys_write(fd, buf, count)); + return __sysret(_sys_write(fd, buf, count)); } @@ -1265,18 +931,15 @@ ssize_t write(int fd, const void *buf, size_t count) */ static __attribute__((unused)) -int sys_memfd_create(const char *name, unsigned int flags) +int _sys_memfd_create(const char *name, unsigned int flags) { - return my_syscall2(__NR_memfd_create, name, flags); + return __nolibc_syscall2(__NR_memfd_create, name, flags); } static __attribute__((unused)) int memfd_create(const char *name, unsigned int flags) { - return __sysret(sys_memfd_create(name, flags)); + return __sysret(_sys_memfd_create(name, flags)); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_SYS_H */ diff --git a/tools/include/nolibc/sys/auxv.h b/tools/include/nolibc/sys/auxv.h new file mode 100644 index 000000000000..0e98325e7347 --- /dev/null +++ b/tools/include/nolibc/sys/auxv.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * auxv definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_AUXV_H +#define _NOLIBC_SYS_AUXV_H + +#ifndef NOLIBC_NO_RUNTIME + +#include "../crt.h" + +static __attribute__((unused)) +unsigned long getauxval(unsigned long type) +{ + const unsigned long *auxv = _auxv; + unsigned long ret; + + if (!auxv) + return 0; + + while (1) { + if (!auxv[0] && !auxv[1]) { + ret = 0; + break; + } + + if (auxv[0] == type) { + ret = auxv[1]; + break; + } + + auxv += 2; + } + + return ret; +} + +#endif /* NOLIBC_NO_RUNTIME */ +#endif /* _NOLIBC_SYS_AUXV_H */ diff --git a/tools/include/nolibc/sys/ioctl.h b/tools/include/nolibc/sys/ioctl.h new file mode 100644 index 000000000000..f062a7b806cf --- /dev/null +++ b/tools/include/nolibc/sys/ioctl.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Ioctl definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_IOCTL_H +#define _NOLIBC_SYS_IOCTL_H + +#include "../sys.h" + +#include <linux/ioctl.h> + +/* + * int ioctl(int fd, unsigned long cmd, ... arg); + */ + +static __attribute__((unused)) +long _sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + return __nolibc_syscall3(__NR_ioctl, fd, cmd, arg); +} + +#define ioctl(fd, cmd, arg) __sysret(_sys_ioctl(fd, cmd, (unsigned long)(arg))) + +#endif /* _NOLIBC_SYS_IOCTL_H */ diff --git a/tools/include/nolibc/sys/mman.h b/tools/include/nolibc/sys/mman.h new file mode 100644 index 000000000000..72bc1d43d1d4 --- /dev/null +++ b/tools/include/nolibc/sys/mman.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * mm definition for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_MMAN_H +#define _NOLIBC_SYS_MMAN_H + +#include "../arch.h" +#include "../sys.h" + +#ifndef _sys_mmap +static __attribute__((unused)) +void *_sys_mmap(void *addr, size_t length, int prot, int flags, int fd, + off_t offset) +{ + int n; + +#if defined(__NR_mmap2) + n = __NR_mmap2; + offset >>= 12; +#else + n = __NR_mmap; +#endif + + return (void *)(unsigned long)__nolibc_syscall6(n, addr, length, prot, flags, fd, offset); +} +#endif + +static __attribute__((unused)) +void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) +{ + void *ret = _sys_mmap(addr, length, prot, flags, fd, offset); + + if ((unsigned long)ret >= -4095UL) { + SET_ERRNO(-(long)ret); + ret = MAP_FAILED; + } + return ret; +} + +static __attribute__((unused)) +void *_sys_mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address) +{ + return (void *)(unsigned long)__nolibc_syscall5(__NR_mremap, old_address, old_size, + new_size, flags, new_address); +} + +static __attribute__((unused)) +void *mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address) +{ + void *ret = _sys_mremap(old_address, old_size, new_size, flags, new_address); + + if ((unsigned long)ret >= -4095UL) { + SET_ERRNO(-(long)ret); + ret = MAP_FAILED; + } + return ret; +} + +static __attribute__((unused)) +int _sys_munmap(void *addr, size_t length) +{ + return __nolibc_syscall2(__NR_munmap, addr, length); +} + +static __attribute__((unused)) +int munmap(void *addr, size_t length) +{ + return __sysret(_sys_munmap(addr, length)); +} + +#endif /* _NOLIBC_SYS_MMAN_H */ diff --git a/tools/include/nolibc/sys/mount.h b/tools/include/nolibc/sys/mount.h new file mode 100644 index 000000000000..8d3128ebc536 --- /dev/null +++ b/tools/include/nolibc/sys/mount.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Mount definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_MOUNT_H +#define _NOLIBC_SYS_MOUNT_H + +#include "../sys.h" + +#include <linux/mount.h> + +/* + * int mount(const char *source, const char *target, + * const char *fstype, unsigned long flags, + * const void *data); + */ +static __attribute__((unused)) +int _sys_mount(const char *src, const char *tgt, const char *fst, + unsigned long flags, const void *data) +{ + return __nolibc_syscall5(__NR_mount, src, tgt, fst, flags, data); +} + +static __attribute__((unused)) +int mount(const char *src, const char *tgt, + const char *fst, unsigned long flags, + const void *data) +{ + return __sysret(_sys_mount(src, tgt, fst, flags, data)); +} + +#endif /* _NOLIBC_SYS_MOUNT_H */ diff --git a/tools/include/nolibc/sys/prctl.h b/tools/include/nolibc/sys/prctl.h new file mode 100644 index 000000000000..3e11d3e5af12 --- /dev/null +++ b/tools/include/nolibc/sys/prctl.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Prctl definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_PRCTL_H +#define _NOLIBC_SYS_PRCTL_H + +#include "../sys.h" + +#include <linux/prctl.h> + +/* + * int prctl(int option, unsigned long arg2, unsigned long arg3, + * unsigned long arg4, unsigned long arg5); + */ + +static __attribute__((unused)) +int _sys_prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + return __nolibc_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5); +} + +static __attribute__((unused)) +int prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + return __sysret(_sys_prctl(option, arg2, arg3, arg4, arg5)); +} + +#endif /* _NOLIBC_SYS_PRCTL_H */ diff --git a/tools/include/nolibc/sys/ptrace.h b/tools/include/nolibc/sys/ptrace.h new file mode 100644 index 000000000000..4c4820f4f336 --- /dev/null +++ b/tools/include/nolibc/sys/ptrace.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * ptrace for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + * Copyright (C) 2025 Intel Corporation + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_PTRACE_H +#define _NOLIBC_SYS_PTRACE_H + +#include "../sys.h" + +#include <linux/ptrace.h> + +/* + * long ptrace(int op, pid_t pid, void *addr, void *data); + */ +static __attribute__((unused)) +long _sys_ptrace(int op, pid_t pid, void *addr, void *data) +{ + return __nolibc_syscall4(__NR_ptrace, op, pid, addr, data); +} + +static __attribute__((unused)) +ssize_t ptrace(int op, pid_t pid, void *addr, void *data) +{ + return __sysret(_sys_ptrace(op, pid, addr, data)); +} + +#endif /* _NOLIBC_SYS_PTRACE_H */ diff --git a/tools/include/nolibc/sys/random.h b/tools/include/nolibc/sys/random.h new file mode 100644 index 000000000000..6d055b00bd90 --- /dev/null +++ b/tools/include/nolibc/sys/random.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * random definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_RANDOM_H +#define _NOLIBC_SYS_RANDOM_H + +#include "../arch.h" +#include "../sys.h" + +#include <linux/random.h> + +/* + * ssize_t getrandom(void *buf, size_t buflen, unsigned int flags); + */ + +static __attribute__((unused)) +ssize_t _sys_getrandom(void *buf, size_t buflen, unsigned int flags) +{ + return __nolibc_syscall3(__NR_getrandom, buf, buflen, flags); +} + +static __attribute__((unused)) +ssize_t getrandom(void *buf, size_t buflen, unsigned int flags) +{ + return __sysret(_sys_getrandom(buf, buflen, flags)); +} + +#endif /* _NOLIBC_SYS_RANDOM_H */ diff --git a/tools/include/nolibc/sys/reboot.h b/tools/include/nolibc/sys/reboot.h new file mode 100644 index 000000000000..73cbca37c1f0 --- /dev/null +++ b/tools/include/nolibc/sys/reboot.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Reboot definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_REBOOT_H +#define _NOLIBC_SYS_REBOOT_H + +#include "../sys.h" + +#include <linux/reboot.h> + +/* + * int reboot(int cmd); + * <cmd> is among LINUX_REBOOT_CMD_* + */ + +static __attribute__((unused)) +ssize_t _sys_reboot(int magic1, int magic2, int cmd, void *arg) +{ + return __nolibc_syscall4(__NR_reboot, magic1, magic2, cmd, arg); +} + +static __attribute__((unused)) +int reboot(int cmd) +{ + return __sysret(_sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, NULL)); +} + +#endif /* _NOLIBC_SYS_REBOOT_H */ diff --git a/tools/include/nolibc/sys/resource.h b/tools/include/nolibc/sys/resource.h new file mode 100644 index 000000000000..f35de1c4e9ad --- /dev/null +++ b/tools/include/nolibc/sys/resource.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Resource definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_RESOURCE_H +#define _NOLIBC_SYS_RESOURCE_H + +#include "../sys.h" + +#include <linux/resource.h> + +/* + * int getrlimit(int resource, struct rlimit *rlim); + * int setrlimit(int resource, const struct rlimit *rlim); + */ + +static __attribute__((unused)) +int _sys_prlimit64(pid_t pid, int resource, + const struct rlimit64 *new_limit, struct rlimit64 *old_limit) +{ + return __nolibc_syscall4(__NR_prlimit64, pid, resource, new_limit, old_limit); +} + +static __attribute__((unused)) +int getrlimit(int resource, struct rlimit *rlim) +{ + struct rlimit64 rlim64; + int ret; + + ret = __sysret(_sys_prlimit64(0, resource, NULL, &rlim64)); + rlim->rlim_cur = rlim64.rlim_cur; + rlim->rlim_max = rlim64.rlim_max; + + return ret; +} + +static __attribute__((unused)) +int setrlimit(int resource, const struct rlimit *rlim) +{ + struct rlimit64 rlim64 = { + .rlim_cur = rlim->rlim_cur, + .rlim_max = rlim->rlim_max, + }; + + return __sysret(_sys_prlimit64(0, resource, &rlim64, NULL)); +} + +#endif /* _NOLIBC_SYS_RESOURCE_H */ diff --git a/tools/include/nolibc/sys/select.h b/tools/include/nolibc/sys/select.h new file mode 100644 index 000000000000..6d65d9ef3d6a --- /dev/null +++ b/tools/include/nolibc/sys/select.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ + +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_SELECT_H +#define _NOLIBC_SYS_SELECT_H + +#include <linux/time.h> +#include <linux/unistd.h> + +/* commonly an fd_set represents 256 FDs */ +#ifndef FD_SETSIZE +#define FD_SETSIZE 256 +#endif + +#define FD_SETIDXMASK (8 * sizeof(unsigned long)) +#define FD_SETBITMASK (8 * sizeof(unsigned long)-1) + +/* for select() */ +typedef struct { + unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK]; +} fd_set; + +#define FD_CLR(fd, set) do { \ + fd_set *__set = (set); \ + int __fd = (fd); \ + if (__fd >= 0) \ + __set->fds[__fd / FD_SETIDXMASK] &= \ + ~(1U << (__fd & FD_SETBITMASK)); \ + } while (0) + +#define FD_SET(fd, set) do { \ + fd_set *__set = (set); \ + int __fd = (fd); \ + if (__fd >= 0) \ + __set->fds[__fd / FD_SETIDXMASK] |= \ + 1 << (__fd & FD_SETBITMASK); \ + } while (0) + +#define FD_ISSET(fd, set) ({ \ + fd_set *__set = (set); \ + int __fd = (fd); \ + int __r = 0; \ + if (__fd >= 0) \ + __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \ +1U << (__fd & FD_SETBITMASK)); \ + __r; \ + }) + +#define FD_ZERO(set) do { \ + fd_set *__set = (set); \ + int __idx; \ + int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\ + for (__idx = 0; __idx < __size; __idx++) \ + __set->fds[__idx] = 0; \ + } while (0) + +/* + * int select(int nfds, fd_set *read_fds, fd_set *write_fds, + * fd_set *except_fds, struct timeval *timeout); + */ + +static __attribute__((unused)) +int _sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) +{ +#if defined(__NR_pselect6_time64) + struct __kernel_timespec t; + + if (timeout) { + t.tv_sec = timeout->tv_sec; + t.tv_nsec = (uint32_t)timeout->tv_usec * 1000; + } + return __nolibc_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, + timeout ? &t : NULL, NULL); +#else + struct __kernel_old_timespec t; + + if (timeout) { + t.tv_sec = timeout->tv_sec; + t.tv_nsec = (uint32_t)timeout->tv_usec * 1000; + } + return __nolibc_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, + timeout ? &t : NULL, NULL); +#endif +} + +static __attribute__((unused)) +int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) +{ + return __sysret(_sys_select(nfds, rfds, wfds, efds, timeout)); +} + + +#endif /* _NOLIBC_SYS_SELECT_H */ diff --git a/tools/include/nolibc/sys/stat.h b/tools/include/nolibc/sys/stat.h new file mode 100644 index 000000000000..07ae715ff253 --- /dev/null +++ b/tools/include/nolibc/sys/stat.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * stat definition for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_STAT_H +#define _NOLIBC_SYS_STAT_H + +#include "../arch.h" +#include "../types.h" +#include "../sys.h" +#include "../sys/sysmacros.h" + +/* + * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf); + * int stat(const char *path, struct stat *buf); + * int fstatat(int fd, const char *path, struct stat *buf, int flag); + * int fstat(int fildes, struct stat *buf); + * int lstat(const char *path, struct stat *buf); + */ + +static __attribute__((unused)) +int _sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) +{ +#ifdef __NR_statx + return __nolibc_syscall5(__NR_statx, fd, path, flags, mask, buf); +#else + return __nolibc_enosys(__func__, fd, path, flags, mask, buf); +#endif +} + +static __attribute__((unused)) +int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) +{ + return __sysret(_sys_statx(fd, path, flags, mask, buf)); +} + + +static __attribute__((unused)) +int fstatat(int fd, const char *path, struct stat *buf, int flag) +{ + struct statx statx; + long ret; + + ret = __sysret(_sys_statx(fd, path, flag | AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx)); + if (ret == -1) + return ret; + + buf->st_dev = makedev(statx.stx_dev_major, statx.stx_dev_minor); + buf->st_ino = statx.stx_ino; + buf->st_mode = statx.stx_mode; + buf->st_nlink = statx.stx_nlink; + buf->st_uid = statx.stx_uid; + buf->st_gid = statx.stx_gid; + buf->st_rdev = makedev(statx.stx_rdev_major, statx.stx_rdev_minor); + buf->st_size = statx.stx_size; + buf->st_blksize = statx.stx_blksize; + buf->st_blocks = statx.stx_blocks; + buf->st_atim.tv_sec = statx.stx_atime.tv_sec; + buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec; + buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec; + buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec; + buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec; + buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec; + + return 0; +} + +static __attribute__((unused)) +int stat(const char *path, struct stat *buf) +{ + return fstatat(AT_FDCWD, path, buf, 0); +} + +static __attribute__((unused)) +int fstat(int fildes, struct stat *buf) +{ + return fstatat(fildes, "", buf, AT_EMPTY_PATH); +} + +static __attribute__((unused)) +int lstat(const char *path, struct stat *buf) +{ + return fstatat(AT_FDCWD, path, buf, AT_SYMLINK_NOFOLLOW); +} + +#endif /* _NOLIBC_SYS_STAT_H */ diff --git a/tools/include/nolibc/sys/syscall.h b/tools/include/nolibc/sys/syscall.h new file mode 100644 index 000000000000..7f06314fcf0d --- /dev/null +++ b/tools/include/nolibc/sys/syscall.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * syscall() definition for NOLIBC + * Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_SYSCALL_H +#define _NOLIBC_SYS_SYSCALL_H + +#define ___nolibc_syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N +#define __nolibc_syscall_narg(...) ___nolibc_syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0) +#define __nolibc_syscall(N, ...) __nolibc_syscall##N(__VA_ARGS__) +#define __nolibc_syscall_n(N, ...) __nolibc_syscall(N, __VA_ARGS__) +#define _syscall(...) __nolibc_syscall_n(__nolibc_syscall_narg(__VA_ARGS__), ##__VA_ARGS__) +#define syscall(...) __sysret(_syscall(__VA_ARGS__)) + +#endif /* _NOLIBC_SYS_SYSCALL_H */ diff --git a/tools/include/nolibc/sys/sysmacros.h b/tools/include/nolibc/sys/sysmacros.h new file mode 100644 index 000000000000..ff9dd85ca59c --- /dev/null +++ b/tools/include/nolibc/sys/sysmacros.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Sysmacro definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_SYSMACROS_H +#define _NOLIBC_SYS_SYSMACROS_H + +#include "../std.h" + +static __inline__ dev_t __nolibc_makedev(unsigned int maj, unsigned int min) +{ + return (((dev_t)maj & ~0xfff) << 32) | ((maj & 0xfff) << 8) | + (((dev_t)min & ~0xff) << 12) | (min & 0xff); +} + +#define makedev(maj, min) __nolibc_makedev(maj, min) + +static __inline__ unsigned int __nolibc_major(dev_t dev) +{ + return ((dev >> 32) & ~0xfff) | ((dev >> 8) & 0xfff); +} + +#define major(dev) __nolibc_major(dev) + +static __inline__ unsigned int __nolibc_minor(dev_t dev) +{ + return ((dev >> 12) & ~0xff) | (dev & 0xff); +} + +#define minor(dev) __nolibc_minor(dev) + +#endif /* _NOLIBC_SYS_SYSMACROS_H */ diff --git a/tools/include/nolibc/sys/time.h b/tools/include/nolibc/sys/time.h new file mode 100644 index 000000000000..c3bb47f69f06 --- /dev/null +++ b/tools/include/nolibc/sys/time.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * time definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_TIME_H +#define _NOLIBC_SYS_TIME_H + +#include "../arch.h" +#include "../sys.h" + +static int _sys_clock_gettime(clockid_t clockid, struct timespec *tp); + +/* + * int gettimeofday(struct timeval *tv, struct timezone *tz); + */ + +static __attribute__((unused)) +int _sys_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + (void) tz; /* Non-NULL tz is undefined behaviour */ + + struct timespec tp; + int ret; + + ret = _sys_clock_gettime(CLOCK_REALTIME, &tp); + if (!ret && tv) { + tv->tv_sec = tp.tv_sec; + tv->tv_usec = (uint32_t)tp.tv_nsec / 1000; + } + + return ret; +} + +static __attribute__((unused)) +int gettimeofday(struct timeval *tv, struct timezone *tz) +{ + return __sysret(_sys_gettimeofday(tv, tz)); +} + +#endif /* _NOLIBC_SYS_TIME_H */ diff --git a/tools/include/nolibc/sys/timerfd.h b/tools/include/nolibc/sys/timerfd.h new file mode 100644 index 000000000000..cb7fef37a7a4 --- /dev/null +++ b/tools/include/nolibc/sys/timerfd.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * timerfd definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_TIMERFD_H +#define _NOLIBC_SYS_TIMERFD_H + +#include "../sys.h" +#include "../time.h" + +#include <linux/timerfd.h> + + +static __attribute__((unused)) +int _sys_timerfd_create(int clockid, int flags) +{ + return __nolibc_syscall2(__NR_timerfd_create, clockid, flags); +} + +static __attribute__((unused)) +int timerfd_create(int clockid, int flags) +{ + return __sysret(_sys_timerfd_create(clockid, flags)); +} + + +static __attribute__((unused)) +int _sys_timerfd_gettime(int fd, struct itimerspec *curr_value) +{ +#if defined(__NR_timerfd_gettime64) + __nolibc_assert_time64_type(curr_value->it_value.tv_sec); + return __nolibc_syscall2(__NR_timerfd_gettime64, fd, curr_value); +#else + __nolibc_assert_native_time64(); + return __nolibc_syscall2(__NR_timerfd_gettime, fd, curr_value); +#endif +} + +static __attribute__((unused)) +int timerfd_gettime(int fd, struct itimerspec *curr_value) +{ + return __sysret(_sys_timerfd_gettime(fd, curr_value)); +} + + +static __attribute__((unused)) +int _sys_timerfd_settime(int fd, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ +#if defined(__NR_timerfd_settime64) + __nolibc_assert_time64_type(new_value->it_value.tv_sec); + return __nolibc_syscall4(__NR_timerfd_settime64, fd, flags, new_value, old_value); +#else + __nolibc_assert_native_time64(); + return __nolibc_syscall4(__NR_timerfd_settime, fd, flags, new_value, old_value); +#endif +} + +static __attribute__((unused)) +int timerfd_settime(int fd, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ + return __sysret(_sys_timerfd_settime(fd, flags, new_value, old_value)); +} + +#endif /* _NOLIBC_SYS_TIMERFD_H */ diff --git a/tools/include/nolibc/sys/types.h b/tools/include/nolibc/sys/types.h new file mode 100644 index 000000000000..8a264a13275c --- /dev/null +++ b/tools/include/nolibc/sys/types.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * sys/types.h shim for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +#include "../types.h" diff --git a/tools/include/nolibc/sys/uio.h b/tools/include/nolibc/sys/uio.h new file mode 100644 index 000000000000..06bf17ddd5d2 --- /dev/null +++ b/tools/include/nolibc/sys/uio.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * uio for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + * Copyright (C) 2025 Intel Corporation + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_UIO_H +#define _NOLIBC_SYS_UIO_H + +#include "../sys.h" +#include <linux/uio.h> + + +/* + * ssize_t readv(int fd, const struct iovec *iovec, int count); + */ +static __attribute__((unused)) +ssize_t _sys_readv(int fd, const struct iovec *iovec, int count) +{ + return __nolibc_syscall3(__NR_readv, fd, iovec, count); +} + +static __attribute__((unused)) +ssize_t readv(int fd, const struct iovec *iovec, int count) +{ + return __sysret(_sys_readv(fd, iovec, count)); +} + +/* + * ssize_t writev(int fd, const struct iovec *iovec, int count); + */ +static __attribute__((unused)) +ssize_t _sys_writev(int fd, const struct iovec *iovec, int count) +{ + return __nolibc_syscall3(__NR_writev, fd, iovec, count); +} + +static __attribute__((unused)) +ssize_t writev(int fd, const struct iovec *iovec, int count) +{ + return __sysret(_sys_writev(fd, iovec, count)); +} + + +#endif /* _NOLIBC_SYS_UIO_H */ diff --git a/tools/include/nolibc/sys/utsname.h b/tools/include/nolibc/sys/utsname.h new file mode 100644 index 000000000000..2aaf873b7985 --- /dev/null +++ b/tools/include/nolibc/sys/utsname.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Utsname definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_UTSNAME_H +#define _NOLIBC_SYS_UTSNAME_H + +#include "../sys.h" + +#include <linux/utsname.h> + +/* + * int uname(struct utsname *buf); + */ + +struct utsname { + char sysname[65]; + char nodename[65]; + char release[65]; + char version[65]; + char machine[65]; + char domainname[65]; +}; + +static __attribute__((unused)) +int _sys_uname(struct utsname *buf) +{ + return __nolibc_syscall1(__NR_uname, buf); +} + +static __attribute__((unused)) +int uname(struct utsname *buf) +{ + return __sysret(_sys_uname(buf)); +} + +#endif /* _NOLIBC_SYS_UTSNAME_H */ diff --git a/tools/include/nolibc/sys/wait.h b/tools/include/nolibc/sys/wait.h new file mode 100644 index 000000000000..7a1feb2b66fc --- /dev/null +++ b/tools/include/nolibc/sys/wait.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * wait definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_WAIT_H +#define _NOLIBC_SYS_WAIT_H + +#include "../arch.h" +#include "../std.h" +#include "../types.h" + +/* + * pid_t wait(int *status); + * pid_t waitpid(pid_t pid, int *status, int options); + * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); + */ + +static __attribute__((unused)) +int _sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage) +{ + return __nolibc_syscall5(__NR_waitid, which, pid, infop, options, rusage); +} + +static __attribute__((unused)) +int waitid(int which, pid_t pid, siginfo_t *infop, int options) +{ + return __sysret(_sys_waitid(which, pid, infop, options, NULL)); +} + + +static __attribute__((unused)) +pid_t waitpid(pid_t pid, int *status, int options) +{ + int idtype, ret; + siginfo_t info; + pid_t id; + + if (pid == INT_MIN) { + SET_ERRNO(ESRCH); + return -1; + } else if (pid < -1) { + idtype = P_PGID; + id = -pid; + } else if (pid == -1) { + idtype = P_ALL; + id = 0; + } else if (pid == 0) { + idtype = P_PGID; + id = 0; + } else { + idtype = P_PID; + id = pid; + } + + options |= WEXITED; + + ret = waitid(idtype, id, &info, options); + if (ret) + return -1; + + switch (info.si_code) { + case 0: + if (status) + *status = 0; + break; + case CLD_EXITED: + if (status) + *status = (info.si_status & 0xff) << 8; + break; + case CLD_KILLED: + if (status) + *status = info.si_status & 0x7f; + break; + case CLD_DUMPED: + if (status) + *status = (info.si_status & 0x7f) | 0x80; + break; + case CLD_STOPPED: + case CLD_TRAPPED: + if (status) + *status = (info.si_status << 8) + 0x7f; + break; + case CLD_CONTINUED: + if (status) + *status = 0xffff; + break; + default: + return -1; + } + + return info.si_pid; +} + +static __attribute__((unused)) +pid_t wait(int *status) +{ + return waitpid(-1, status, 0); +} + +#endif /* _NOLIBC_SYS_WAIT_H */ diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index 84655361b9ad..9ba930710ff9 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_TIME_H #define _NOLIBC_TIME_H @@ -12,20 +15,191 @@ #include "types.h" #include "sys.h" +#include <linux/signal.h> +#include <linux/time.h> + +#define __nolibc_assert_time64_type(t) \ + __nolibc_static_assert(sizeof(t) == 8) + +#define __nolibc_assert_native_time64() \ + __nolibc_assert_time64_type(__kernel_old_time_t) + +/* + * int clock_getres(clockid_t clockid, struct timespec *res); + * int clock_gettime(clockid_t clockid, struct timespec *tp); + * int clock_settime(clockid_t clockid, const struct timespec *tp); + * int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + * struct timespec *rmtp) + */ + +static __attribute__((unused)) +int _sys_clock_getres(clockid_t clockid, struct timespec *res) +{ +#if defined(__NR_clock_getres_time64) + __nolibc_assert_time64_type(res->tv_sec); + return __nolibc_syscall2(__NR_clock_getres_time64, clockid, res); +#else + __nolibc_assert_native_time64(); + return __nolibc_syscall2(__NR_clock_getres, clockid, res); +#endif +} + +static __attribute__((unused)) +int clock_getres(clockid_t clockid, struct timespec *res) +{ + return __sysret(_sys_clock_getres(clockid, res)); +} + +static __attribute__((unused)) +int _sys_clock_gettime(clockid_t clockid, struct timespec *tp) +{ +#if defined(__NR_clock_gettime64) + __nolibc_assert_time64_type(tp->tv_sec); + return __nolibc_syscall2(__NR_clock_gettime64, clockid, tp); +#else + __nolibc_assert_native_time64(); + return __nolibc_syscall2(__NR_clock_gettime, clockid, tp); +#endif +} + +static __attribute__((unused)) +int clock_gettime(clockid_t clockid, struct timespec *tp) +{ + return __sysret(_sys_clock_gettime(clockid, tp)); +} + +static __attribute__((unused)) +int _sys_clock_settime(clockid_t clockid, struct timespec *tp) +{ +#if defined(__NR_clock_settime64) + __nolibc_assert_time64_type(tp->tv_sec); + return __nolibc_syscall2(__NR_clock_settime64, clockid, tp); +#else + __nolibc_assert_native_time64(); + return __nolibc_syscall2(__NR_clock_settime, clockid, tp); +#endif +} + +static __attribute__((unused)) +int clock_settime(clockid_t clockid, struct timespec *tp) +{ + return __sysret(_sys_clock_settime(clockid, tp)); +} + +static __attribute__((unused)) +int _sys_clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + struct timespec *rmtp) +{ +#if defined(__NR_clock_nanosleep_time64) + __nolibc_assert_time64_type(rqtp->tv_sec); + return __nolibc_syscall4(__NR_clock_nanosleep_time64, clockid, flags, rqtp, rmtp); +#else + __nolibc_assert_native_time64(); + return __nolibc_syscall4(__NR_clock_nanosleep, clockid, flags, rqtp, rmtp); +#endif +} + +static __attribute__((unused)) +int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + struct timespec *rmtp) +{ + /* Directly return a positive error number */ + return -_sys_clock_nanosleep(clockid, flags, rqtp, rmtp); +} + +static __inline__ +double difftime(time_t time1, time_t time2) +{ + return time1 - time2; +} + +static __inline__ +int nanosleep(const struct timespec *rqtp, struct timespec *rmtp) +{ + return __sysret(_sys_clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp)); +} + + static __attribute__((unused)) time_t time(time_t *tptr) { struct timeval tv; /* note, cannot fail here */ - sys_gettimeofday(&tv, NULL); + _sys_gettimeofday(&tv, NULL); if (tptr) *tptr = tv.tv_sec; return tv.tv_sec; } -/* make sure to include all global symbols */ -#include "nolibc.h" + +/* + * int timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid); + * int timer_gettime(timer_t timerid, struct itimerspec *curr_value); + * int timer_settime(timer_t timerid, int flags, const struct itimerspec *new_value, struct itimerspec *old_value); + */ + +static __attribute__((unused)) +int _sys_timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid) +{ + return __nolibc_syscall3(__NR_timer_create, clockid, evp, timerid); +} + +static __attribute__((unused)) +int timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid) +{ + return __sysret(_sys_timer_create(clockid, evp, timerid)); +} + +static __attribute__((unused)) +int _sys_timer_delete(timer_t timerid) +{ + return __nolibc_syscall1(__NR_timer_delete, timerid); +} + +static __attribute__((unused)) +int timer_delete(timer_t timerid) +{ + return __sysret(_sys_timer_delete(timerid)); +} + +static __attribute__((unused)) +int _sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value) +{ +#if defined(__NR_timer_gettime64) + __nolibc_assert_time64_type(curr_value->it_value.tv_sec); + return __nolibc_syscall2(__NR_timer_gettime64, timerid, curr_value); +#else + __nolibc_assert_native_time64(); + return __nolibc_syscall2(__NR_timer_gettime, timerid, curr_value); +#endif +} + +static __attribute__((unused)) +int timer_gettime(timer_t timerid, struct itimerspec *curr_value) +{ + return __sysret(_sys_timer_gettime(timerid, curr_value)); +} + +static __attribute__((unused)) +int _sys_timer_settime(timer_t timerid, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ +#if defined(__NR_timer_settime64) + __nolibc_assert_time64_type(new_value->it_value.tv_sec); + return __nolibc_syscall4(__NR_timer_settime64, timerid, flags, new_value, old_value); +#else + __nolibc_assert_native_time64(); + return __nolibc_syscall4(__NR_timer_settime, timerid, flags, new_value, old_value); +#endif +} + +static __attribute__((unused)) +int timer_settime(timer_t timerid, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ + return __sysret(_sys_timer_settime(timerid, flags, new_value, old_value)); +} #endif /* _NOLIBC_TIME_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index b26a5d0c417c..8f3cb18df7f1 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -4,17 +4,33 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_TYPES_H #define _NOLIBC_TYPES_H #include "std.h" #include <linux/mman.h> -#include <linux/reboot.h> /* for LINUX_REBOOT_* */ #include <linux/stat.h> -#include <linux/time.h> +#include <linux/time_types.h> #include <linux/wait.h> -#include <linux/resource.h> +struct timespec { + time_t tv_sec; + int64_t tv_nsec; +}; +#define _STRUCT_TIMESPEC + +/* Never use with system calls */ +struct timeval { + time_t tv_sec; + int64_t tv_usec; +}; + +#define timeval __nolibc_kernel_timeval +#include <linux/time.h> +#undef timeval /* Only the generic macros and types may be defined here. The arch-specific * ones such as the O_RDONLY and related macros used by fcntl() and open() @@ -69,11 +85,6 @@ #define DT_LNK 0xa #define DT_SOCK 0xc -/* commonly an fd_set represents 256 FDs */ -#ifndef FD_SETSIZE -#define FD_SETSIZE 256 -#endif - /* PATH_MAX and MAXPATHLEN are often used and found with plenty of different * values. */ @@ -114,62 +125,6 @@ #define EXIT_SUCCESS 0 #define EXIT_FAILURE 1 -#define FD_SETIDXMASK (8 * sizeof(unsigned long)) -#define FD_SETBITMASK (8 * sizeof(unsigned long)-1) - -/* for select() */ -typedef struct { - unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK]; -} fd_set; - -#define FD_CLR(fd, set) do { \ - fd_set *__set = (set); \ - int __fd = (fd); \ - if (__fd >= 0) \ - __set->fds[__fd / FD_SETIDXMASK] &= \ - ~(1U << (__fd & FX_SETBITMASK)); \ - } while (0) - -#define FD_SET(fd, set) do { \ - fd_set *__set = (set); \ - int __fd = (fd); \ - if (__fd >= 0) \ - __set->fds[__fd / FD_SETIDXMASK] |= \ - 1 << (__fd & FD_SETBITMASK); \ - } while (0) - -#define FD_ISSET(fd, set) ({ \ - fd_set *__set = (set); \ - int __fd = (fd); \ - int __r = 0; \ - if (__fd >= 0) \ - __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \ -1U << (__fd & FD_SET_BITMASK)); \ - __r; \ - }) - -#define FD_ZERO(set) do { \ - fd_set *__set = (set); \ - int __idx; \ - int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\ - for (__idx = 0; __idx < __size; __idx++) \ - __set->fds[__idx] = 0; \ - } while (0) - -/* for poll() */ -#define POLLIN 0x0001 -#define POLLPRI 0x0002 -#define POLLOUT 0x0004 -#define POLLERR 0x0008 -#define POLLHUP 0x0010 -#define POLLNVAL 0x0020 - -struct pollfd { - int fd; - short int events; - short int revents; -}; - /* for getdents64() */ struct linux_dirent64 { uint64_t d_ino; @@ -198,14 +153,8 @@ struct stat { union { time_t st_ctime; struct timespec st_ctim; }; /* time of last status change */ }; -/* WARNING, it only deals with the 4096 first majors and 256 first minors */ -#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff))) -#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff)) -#define minor(dev) ((unsigned int)(((dev) & 0xff)) - -#ifndef offsetof -#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD) -#endif +typedef __kernel_clockid_t clockid_t; +typedef int timer_t; #ifndef container_of #define container_of(PTR, TYPE, FIELD) ({ \ @@ -214,7 +163,4 @@ struct stat { }) #endif -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_TYPES_H */ diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h index e38f3660c051..79599ceef45d 100644 --- a/tools/include/nolibc/unistd.h +++ b/tools/include/nolibc/unistd.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_UNISTD_H #define _NOLIBC_UNISTD_H @@ -17,13 +20,65 @@ #define STDOUT_FILENO 1 #define STDERR_FILENO 2 +#define F_OK 0 +#define X_OK 1 +#define W_OK 2 +#define R_OK 4 + +/* + * int access(const char *path, int amode); + * int faccessat(int fd, const char *path, int amode, int flag); + */ + +static __attribute__((unused)) +int _sys_faccessat(int fd, const char *path, int amode, int flag) +{ + return __nolibc_syscall4(__NR_faccessat, fd, path, amode, flag); +} + +static __attribute__((unused)) +int faccessat(int fd, const char *path, int amode, int flag) +{ + return __sysret(_sys_faccessat(fd, path, amode, flag)); +} + +static __attribute__((unused)) +int access(const char *path, int amode) +{ + return faccessat(AT_FDCWD, path, amode, 0); +} + +#if !defined(_sys_ftruncate64) && defined(__NR_ftruncate64) +static __attribute__((unused)) +int _sys_ftruncate64(int fd, uint32_t length0, uint32_t length1) +{ + return __nolibc_syscall3(__NR_ftruncate64, fd, length0, length1); +} +#define _sys_ftruncate64 _sys_ftruncate64 +#endif + +static __attribute__((unused)) +int _sys_ftruncate(int fd, off_t length) +{ +#if defined(_sys_ftruncate64) + return _sys_ftruncate64(fd, __NOLIBC_LLARGPART(length, 0), __NOLIBC_LLARGPART(length, 1)); +#else + return __nolibc_syscall2(__NR_ftruncate, fd, length); +#endif +} + +static __attribute__((unused)) +int ftruncate(int fd, off_t length) +{ + return __sysret(_sys_ftruncate(fd, length)); +} static __attribute__((unused)) int msleep(unsigned int msecs) { struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 }; - if (sys_select(0, 0, 0, 0, &my_timeval) < 0) + if (_sys_select(0, NULL, NULL, NULL, &my_timeval) < 0) return (my_timeval.tv_sec * 1000) + (my_timeval.tv_usec / 1000) + !!(my_timeval.tv_usec % 1000); @@ -36,7 +91,7 @@ unsigned int sleep(unsigned int seconds) { struct timeval my_timeval = { seconds, 0 }; - if (sys_select(0, 0, 0, 0, &my_timeval) < 0) + if (_sys_select(0, NULL, NULL, NULL, &my_timeval) < 0) return my_timeval.tv_sec + !!my_timeval.tv_usec; else return 0; @@ -47,7 +102,7 @@ int usleep(unsigned int usecs) { struct timeval my_timeval = { usecs / 1000000, usecs % 1000000 }; - return sys_select(0, 0, 0, 0, &my_timeval); + return _sys_select(0, NULL, NULL, NULL, &my_timeval); } static __attribute__((unused)) @@ -56,13 +111,4 @@ int tcsetpgrp(int fd, pid_t pid) return ioctl(fd, TIOCSPGRP, &pid); } -#define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N -#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0) -#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__)) -#define _syscall_n(N, ...) _syscall(N, __VA_ARGS__) -#define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__) - -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_UNISTD_H */ diff --git a/tools/include/uapi/asm-generic/errno.h b/tools/include/uapi/asm-generic/errno.h index cf9c51ac49f9..bd78e69e0a43 100644 --- a/tools/include/uapi/asm-generic/errno.h +++ b/tools/include/uapi/asm-generic/errno.h @@ -55,6 +55,7 @@ #define EMULTIHOP 72 /* Multihop attempted */ #define EDOTDOT 73 /* RFS specific error */ #define EBADMSG 74 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EOVERFLOW 75 /* Value too large for defined data type */ #define ENOTUNIQ 76 /* Name not unique on network */ #define EBADFD 77 /* File descriptor in bad state */ @@ -98,6 +99,7 @@ #define EINPROGRESS 115 /* Operation now in progress */ #define ESTALE 116 /* Stale file handle */ #define EUCLEAN 117 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 118 /* Not a XENIX named type file */ #define ENAVAIL 119 /* No XENIX semaphores available */ #define EISNAM 120 /* Is a named type file */ @@ -120,4 +122,6 @@ #define EHWPOISON 133 /* Memory page has hardware error */ +#define EFTYPE 134 /* Wrong file type for the intended operation */ + #endif diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 1ea2c4c33b86..ef1c27fa3c57 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -85,6 +85,7 @@ /* compatibility flags */ #define MAP_FILE 0 +#define PKEY_UNRESTRICTED 0x0 #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index ffff554a5230..f333a0ac4ee4 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -119,14 +119,33 @@ #define SO_DETACH_REUSEPORT_BPF 68 +#define SO_PREFER_BUSY_POLL 69 +#define SO_BUSY_POLL_BUDGET 70 + +#define SO_NETNS_COOKIE 71 + +#define SO_BUF_LOCK 72 + +#define SO_RESERVE_MEM 73 + +#define SO_TXREHASH 74 + #define SO_RCVMARK 75 #define SO_PASSPIDFD 76 #define SO_PEERPIDFD 77 -#define SCM_TS_OPT_ID 78 +#define SO_DEVMEM_LINEAR 78 +#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR +#define SO_DEVMEM_DMABUF 79 +#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF +#define SO_DEVMEM_DONTNEED 80 + +#define SCM_TS_OPT_ID 81 + +#define SO_RCVPRIORITY 82 -#define SO_RCVPRIORITY 79 +#define SO_PASSRIGHTS 83 #if !defined(__KERNEL__) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 88dc393c2bca..a627acc8fb5f 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -849,9 +849,22 @@ __SYSCALL(__NR_getxattrat, sys_getxattrat) __SYSCALL(__NR_listxattrat, sys_listxattrat) #define __NR_removexattrat 466 __SYSCALL(__NR_removexattrat, sys_removexattrat) +#define __NR_open_tree_attr 467 +__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) + +/* fs/inode.c */ +#define __NR_file_getattr 468 +__SYSCALL(__NR_file_getattr, sys_file_getattr) +#define __NR_file_setattr 469 +__SYSCALL(__NR_file_setattr, sys_file_setattr) +#define __NR_listns 470 +__SYSCALL(__NR_listns, sys_listns) + +#define __NR_rseq_slice_yield 471 +__SYSCALL(__NR_rseq_slice_yield, sys_rseq_slice_yield) #undef __NR_syscalls -#define __NR_syscalls 467 +#define __NR_syscalls 472 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h deleted file mode 100644 index 7fba37b94401..000000000000 --- a/tools/include/uapi/drm/drm.h +++ /dev/null @@ -1,1433 +0,0 @@ -/* - * Header for the Direct Rendering Manager - * - * Author: Rickard E. (Rik) Faith <faith@valinux.com> - * - * Acknowledgments: - * Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic cmpxchg. - */ - -/* - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef _DRM_H_ -#define _DRM_H_ - -#if defined(__KERNEL__) - -#include <linux/types.h> -#include <asm/ioctl.h> -typedef unsigned int drm_handle_t; - -#elif defined(__linux__) - -#include <linux/types.h> -#include <asm/ioctl.h> -typedef unsigned int drm_handle_t; - -#else /* One of the BSDs */ - -#include <stdint.h> -#include <sys/ioccom.h> -#include <sys/types.h> -typedef int8_t __s8; -typedef uint8_t __u8; -typedef int16_t __s16; -typedef uint16_t __u16; -typedef int32_t __s32; -typedef uint32_t __u32; -typedef int64_t __s64; -typedef uint64_t __u64; -typedef size_t __kernel_size_t; -typedef unsigned long drm_handle_t; - -#endif - -#if defined(__cplusplus) -extern "C" { -#endif - -#define DRM_NAME "drm" /**< Name in kernel, /dev, and /proc */ -#define DRM_MIN_ORDER 5 /**< At least 2^5 bytes = 32 bytes */ -#define DRM_MAX_ORDER 22 /**< Up to 2^22 bytes = 4MB */ -#define DRM_RAM_PERCENT 10 /**< How much system ram can we lock? */ - -#define _DRM_LOCK_HELD 0x80000000U /**< Hardware lock is held */ -#define _DRM_LOCK_CONT 0x40000000U /**< Hardware lock is contended */ -#define _DRM_LOCK_IS_HELD(lock) ((lock) & _DRM_LOCK_HELD) -#define _DRM_LOCK_IS_CONT(lock) ((lock) & _DRM_LOCK_CONT) -#define _DRM_LOCKING_CONTEXT(lock) ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT)) - -typedef unsigned int drm_context_t; -typedef unsigned int drm_drawable_t; -typedef unsigned int drm_magic_t; - -/* - * Cliprect. - * - * \warning: If you change this structure, make sure you change - * XF86DRIClipRectRec in the server as well - * - * \note KW: Actually it's illegal to change either for - * backwards-compatibility reasons. - */ -struct drm_clip_rect { - unsigned short x1; - unsigned short y1; - unsigned short x2; - unsigned short y2; -}; - -/* - * Drawable information. - */ -struct drm_drawable_info { - unsigned int num_rects; - struct drm_clip_rect *rects; -}; - -/* - * Texture region, - */ -struct drm_tex_region { - unsigned char next; - unsigned char prev; - unsigned char in_use; - unsigned char padding; - unsigned int age; -}; - -/* - * Hardware lock. - * - * The lock structure is a simple cache-line aligned integer. To avoid - * processor bus contention on a multiprocessor system, there should not be any - * other data stored in the same cache line. - */ -struct drm_hw_lock { - __volatile__ unsigned int lock; /**< lock variable */ - char padding[60]; /**< Pad to cache line */ -}; - -/* - * DRM_IOCTL_VERSION ioctl argument type. - * - * \sa drmGetVersion(). - */ -struct drm_version { - int version_major; /**< Major version */ - int version_minor; /**< Minor version */ - int version_patchlevel; /**< Patch level */ - __kernel_size_t name_len; /**< Length of name buffer */ - char __user *name; /**< Name of driver */ - __kernel_size_t date_len; /**< Length of date buffer */ - char __user *date; /**< User-space buffer to hold date */ - __kernel_size_t desc_len; /**< Length of desc buffer */ - char __user *desc; /**< User-space buffer to hold desc */ -}; - -/* - * DRM_IOCTL_GET_UNIQUE ioctl argument type. - * - * \sa drmGetBusid() and drmSetBusId(). - */ -struct drm_unique { - __kernel_size_t unique_len; /**< Length of unique */ - char __user *unique; /**< Unique name for driver instantiation */ -}; - -struct drm_list { - int count; /**< Length of user-space structures */ - struct drm_version __user *version; -}; - -struct drm_block { - int unused; -}; - -/* - * DRM_IOCTL_CONTROL ioctl argument type. - * - * \sa drmCtlInstHandler() and drmCtlUninstHandler(). - */ -struct drm_control { - enum { - DRM_ADD_COMMAND, - DRM_RM_COMMAND, - DRM_INST_HANDLER, - DRM_UNINST_HANDLER - } func; - int irq; -}; - -/* - * Type of memory to map. - */ -enum drm_map_type { - _DRM_FRAME_BUFFER = 0, /**< WC (no caching), no core dump */ - _DRM_REGISTERS = 1, /**< no caching, no core dump */ - _DRM_SHM = 2, /**< shared, cached */ - _DRM_AGP = 3, /**< AGP/GART */ - _DRM_SCATTER_GATHER = 4, /**< Scatter/gather memory for PCI DMA */ - _DRM_CONSISTENT = 5 /**< Consistent memory for PCI DMA */ -}; - -/* - * Memory mapping flags. - */ -enum drm_map_flags { - _DRM_RESTRICTED = 0x01, /**< Cannot be mapped to user-virtual */ - _DRM_READ_ONLY = 0x02, - _DRM_LOCKED = 0x04, /**< shared, cached, locked */ - _DRM_KERNEL = 0x08, /**< kernel requires access */ - _DRM_WRITE_COMBINING = 0x10, /**< use write-combining if available */ - _DRM_CONTAINS_LOCK = 0x20, /**< SHM page that contains lock */ - _DRM_REMOVABLE = 0x40, /**< Removable mapping */ - _DRM_DRIVER = 0x80 /**< Managed by driver */ -}; - -struct drm_ctx_priv_map { - unsigned int ctx_id; /**< Context requesting private mapping */ - void *handle; /**< Handle of map */ -}; - -/* - * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls - * argument type. - * - * \sa drmAddMap(). - */ -struct drm_map { - unsigned long offset; /**< Requested physical address (0 for SAREA)*/ - unsigned long size; /**< Requested physical size (bytes) */ - enum drm_map_type type; /**< Type of memory to map */ - enum drm_map_flags flags; /**< Flags */ - void *handle; /**< User-space: "Handle" to pass to mmap() */ - /**< Kernel-space: kernel-virtual address */ - int mtrr; /**< MTRR slot used */ - /* Private data */ -}; - -/* - * DRM_IOCTL_GET_CLIENT ioctl argument type. - */ -struct drm_client { - int idx; /**< Which client desired? */ - int auth; /**< Is client authenticated? */ - unsigned long pid; /**< Process ID */ - unsigned long uid; /**< User ID */ - unsigned long magic; /**< Magic */ - unsigned long iocs; /**< Ioctl count */ -}; - -enum drm_stat_type { - _DRM_STAT_LOCK, - _DRM_STAT_OPENS, - _DRM_STAT_CLOSES, - _DRM_STAT_IOCTLS, - _DRM_STAT_LOCKS, - _DRM_STAT_UNLOCKS, - _DRM_STAT_VALUE, /**< Generic value */ - _DRM_STAT_BYTE, /**< Generic byte counter (1024bytes/K) */ - _DRM_STAT_COUNT, /**< Generic non-byte counter (1000/k) */ - - _DRM_STAT_IRQ, /**< IRQ */ - _DRM_STAT_PRIMARY, /**< Primary DMA bytes */ - _DRM_STAT_SECONDARY, /**< Secondary DMA bytes */ - _DRM_STAT_DMA, /**< DMA */ - _DRM_STAT_SPECIAL, /**< Special DMA (e.g., priority or polled) */ - _DRM_STAT_MISSED /**< Missed DMA opportunity */ - /* Add to the *END* of the list */ -}; - -/* - * DRM_IOCTL_GET_STATS ioctl argument type. - */ -struct drm_stats { - unsigned long count; - struct { - unsigned long value; - enum drm_stat_type type; - } data[15]; -}; - -/* - * Hardware locking flags. - */ -enum drm_lock_flags { - _DRM_LOCK_READY = 0x01, /**< Wait until hardware is ready for DMA */ - _DRM_LOCK_QUIESCENT = 0x02, /**< Wait until hardware quiescent */ - _DRM_LOCK_FLUSH = 0x04, /**< Flush this context's DMA queue first */ - _DRM_LOCK_FLUSH_ALL = 0x08, /**< Flush all DMA queues first */ - /* These *HALT* flags aren't supported yet - -- they will be used to support the - full-screen DGA-like mode. */ - _DRM_HALT_ALL_QUEUES = 0x10, /**< Halt all current and future queues */ - _DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */ -}; - -/* - * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type. - * - * \sa drmGetLock() and drmUnlock(). - */ -struct drm_lock { - int context; - enum drm_lock_flags flags; -}; - -/* - * DMA flags - * - * \warning - * These values \e must match xf86drm.h. - * - * \sa drm_dma. - */ -enum drm_dma_flags { - /* Flags for DMA buffer dispatch */ - _DRM_DMA_BLOCK = 0x01, /**< - * Block until buffer dispatched. - * - * \note The buffer may not yet have - * been processed by the hardware -- - * getting a hardware lock with the - * hardware quiescent will ensure - * that the buffer has been - * processed. - */ - _DRM_DMA_WHILE_LOCKED = 0x02, /**< Dispatch while lock held */ - _DRM_DMA_PRIORITY = 0x04, /**< High priority dispatch */ - - /* Flags for DMA buffer request */ - _DRM_DMA_WAIT = 0x10, /**< Wait for free buffers */ - _DRM_DMA_SMALLER_OK = 0x20, /**< Smaller-than-requested buffers OK */ - _DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */ -}; - -/* - * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type. - * - * \sa drmAddBufs(). - */ -struct drm_buf_desc { - int count; /**< Number of buffers of this size */ - int size; /**< Size in bytes */ - int low_mark; /**< Low water mark */ - int high_mark; /**< High water mark */ - enum { - _DRM_PAGE_ALIGN = 0x01, /**< Align on page boundaries for DMA */ - _DRM_AGP_BUFFER = 0x02, /**< Buffer is in AGP space */ - _DRM_SG_BUFFER = 0x04, /**< Scatter/gather memory buffer */ - _DRM_FB_BUFFER = 0x08, /**< Buffer is in frame buffer */ - _DRM_PCI_BUFFER_RO = 0x10 /**< Map PCI DMA buffer read-only */ - } flags; - unsigned long agp_start; /**< - * Start address of where the AGP buffers are - * in the AGP aperture - */ -}; - -/* - * DRM_IOCTL_INFO_BUFS ioctl argument type. - */ -struct drm_buf_info { - int count; /**< Entries in list */ - struct drm_buf_desc __user *list; -}; - -/* - * DRM_IOCTL_FREE_BUFS ioctl argument type. - */ -struct drm_buf_free { - int count; - int __user *list; -}; - -/* - * Buffer information - * - * \sa drm_buf_map. - */ -struct drm_buf_pub { - int idx; /**< Index into the master buffer list */ - int total; /**< Buffer size */ - int used; /**< Amount of buffer in use (for DMA) */ - void __user *address; /**< Address of buffer */ -}; - -/* - * DRM_IOCTL_MAP_BUFS ioctl argument type. - */ -struct drm_buf_map { - int count; /**< Length of the buffer list */ -#ifdef __cplusplus - void __user *virt; -#else - void __user *virtual; /**< Mmap'd area in user-virtual */ -#endif - struct drm_buf_pub __user *list; /**< Buffer information */ -}; - -/* - * DRM_IOCTL_DMA ioctl argument type. - * - * Indices here refer to the offset into the buffer list in drm_buf_get. - * - * \sa drmDMA(). - */ -struct drm_dma { - int context; /**< Context handle */ - int send_count; /**< Number of buffers to send */ - int __user *send_indices; /**< List of handles to buffers */ - int __user *send_sizes; /**< Lengths of data to send */ - enum drm_dma_flags flags; /**< Flags */ - int request_count; /**< Number of buffers requested */ - int request_size; /**< Desired size for buffers */ - int __user *request_indices; /**< Buffer information */ - int __user *request_sizes; - int granted_count; /**< Number of buffers granted */ -}; - -enum drm_ctx_flags { - _DRM_CONTEXT_PRESERVED = 0x01, - _DRM_CONTEXT_2DONLY = 0x02 -}; - -/* - * DRM_IOCTL_ADD_CTX ioctl argument type. - * - * \sa drmCreateContext() and drmDestroyContext(). - */ -struct drm_ctx { - drm_context_t handle; - enum drm_ctx_flags flags; -}; - -/* - * DRM_IOCTL_RES_CTX ioctl argument type. - */ -struct drm_ctx_res { - int count; - struct drm_ctx __user *contexts; -}; - -/* - * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type. - */ -struct drm_draw { - drm_drawable_t handle; -}; - -/* - * DRM_IOCTL_UPDATE_DRAW ioctl argument type. - */ -typedef enum { - DRM_DRAWABLE_CLIPRECTS -} drm_drawable_info_type_t; - -struct drm_update_draw { - drm_drawable_t handle; - unsigned int type; - unsigned int num; - unsigned long long data; -}; - -/* - * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type. - */ -struct drm_auth { - drm_magic_t magic; -}; - -/* - * DRM_IOCTL_IRQ_BUSID ioctl argument type. - * - * \sa drmGetInterruptFromBusID(). - */ -struct drm_irq_busid { - int irq; /**< IRQ number */ - int busnum; /**< bus number */ - int devnum; /**< device number */ - int funcnum; /**< function number */ -}; - -enum drm_vblank_seq_type { - _DRM_VBLANK_ABSOLUTE = 0x0, /**< Wait for specific vblank sequence number */ - _DRM_VBLANK_RELATIVE = 0x1, /**< Wait for given number of vblanks */ - /* bits 1-6 are reserved for high crtcs */ - _DRM_VBLANK_HIGH_CRTC_MASK = 0x0000003e, - _DRM_VBLANK_EVENT = 0x4000000, /**< Send event instead of blocking */ - _DRM_VBLANK_FLIP = 0x8000000, /**< Scheduled buffer swap should flip */ - _DRM_VBLANK_NEXTONMISS = 0x10000000, /**< If missed, wait for next vblank */ - _DRM_VBLANK_SECONDARY = 0x20000000, /**< Secondary display controller */ - _DRM_VBLANK_SIGNAL = 0x40000000 /**< Send signal instead of blocking, unsupported */ -}; -#define _DRM_VBLANK_HIGH_CRTC_SHIFT 1 - -#define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE) -#define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \ - _DRM_VBLANK_SECONDARY | _DRM_VBLANK_NEXTONMISS) - -struct drm_wait_vblank_request { - enum drm_vblank_seq_type type; - unsigned int sequence; - unsigned long signal; -}; - -struct drm_wait_vblank_reply { - enum drm_vblank_seq_type type; - unsigned int sequence; - long tval_sec; - long tval_usec; -}; - -/* - * DRM_IOCTL_WAIT_VBLANK ioctl argument type. - * - * \sa drmWaitVBlank(). - */ -union drm_wait_vblank { - struct drm_wait_vblank_request request; - struct drm_wait_vblank_reply reply; -}; - -#define _DRM_PRE_MODESET 1 -#define _DRM_POST_MODESET 2 - -/* - * DRM_IOCTL_MODESET_CTL ioctl argument type - * - * \sa drmModesetCtl(). - */ -struct drm_modeset_ctl { - __u32 crtc; - __u32 cmd; -}; - -/* - * DRM_IOCTL_AGP_ENABLE ioctl argument type. - * - * \sa drmAgpEnable(). - */ -struct drm_agp_mode { - unsigned long mode; /**< AGP mode */ -}; - -/* - * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type. - * - * \sa drmAgpAlloc() and drmAgpFree(). - */ -struct drm_agp_buffer { - unsigned long size; /**< In bytes -- will round to page boundary */ - unsigned long handle; /**< Used for binding / unbinding */ - unsigned long type; /**< Type of memory to allocate */ - unsigned long physical; /**< Physical used by i810 */ -}; - -/* - * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type. - * - * \sa drmAgpBind() and drmAgpUnbind(). - */ -struct drm_agp_binding { - unsigned long handle; /**< From drm_agp_buffer */ - unsigned long offset; /**< In bytes -- will round to page boundary */ -}; - -/* - * DRM_IOCTL_AGP_INFO ioctl argument type. - * - * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(), - * drmAgpBase(), drmAgpSize(), drmAgpMemoryUsed(), drmAgpMemoryAvail(), - * drmAgpVendorId() and drmAgpDeviceId(). - */ -struct drm_agp_info { - int agp_version_major; - int agp_version_minor; - unsigned long mode; - unsigned long aperture_base; /* physical address */ - unsigned long aperture_size; /* bytes */ - unsigned long memory_allowed; /* bytes */ - unsigned long memory_used; - - /* PCI information */ - unsigned short id_vendor; - unsigned short id_device; -}; - -/* - * DRM_IOCTL_SG_ALLOC ioctl argument type. - */ -struct drm_scatter_gather { - unsigned long size; /**< In bytes -- will round to page boundary */ - unsigned long handle; /**< Used for mapping / unmapping */ -}; - -/* - * DRM_IOCTL_SET_VERSION ioctl argument type. - */ -struct drm_set_version { - int drm_di_major; - int drm_di_minor; - int drm_dd_major; - int drm_dd_minor; -}; - -/* DRM_IOCTL_GEM_CLOSE ioctl argument type */ -struct drm_gem_close { - /** Handle of the object to be closed. */ - __u32 handle; - __u32 pad; -}; - -/* DRM_IOCTL_GEM_FLINK ioctl argument type */ -struct drm_gem_flink { - /** Handle for the object being named */ - __u32 handle; - - /** Returned global name */ - __u32 name; -}; - -/* DRM_IOCTL_GEM_OPEN ioctl argument type */ -struct drm_gem_open { - /** Name of object being opened */ - __u32 name; - - /** Returned handle for the object */ - __u32 handle; - - /** Returned size of the object */ - __u64 size; -}; - -/** - * DRM_CAP_DUMB_BUFFER - * - * If set to 1, the driver supports creating dumb buffers via the - * &DRM_IOCTL_MODE_CREATE_DUMB ioctl. - */ -#define DRM_CAP_DUMB_BUFFER 0x1 -/** - * DRM_CAP_VBLANK_HIGH_CRTC - * - * If set to 1, the kernel supports specifying a :ref:`CRTC index<crtc_index>` - * in the high bits of &drm_wait_vblank_request.type. - * - * Starting kernel version 2.6.39, this capability is always set to 1. - */ -#define DRM_CAP_VBLANK_HIGH_CRTC 0x2 -/** - * DRM_CAP_DUMB_PREFERRED_DEPTH - * - * The preferred bit depth for dumb buffers. - * - * The bit depth is the number of bits used to indicate the color of a single - * pixel excluding any padding. This is different from the number of bits per - * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per - * pixel. - * - * Note that this preference only applies to dumb buffers, it's irrelevant for - * other types of buffers. - */ -#define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3 -/** - * DRM_CAP_DUMB_PREFER_SHADOW - * - * If set to 1, the driver prefers userspace to render to a shadow buffer - * instead of directly rendering to a dumb buffer. For best speed, userspace - * should do streaming ordered memory copies into the dumb buffer and never - * read from it. - * - * Note that this preference only applies to dumb buffers, it's irrelevant for - * other types of buffers. - */ -#define DRM_CAP_DUMB_PREFER_SHADOW 0x4 -/** - * DRM_CAP_PRIME - * - * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT - * and &DRM_PRIME_CAP_EXPORT. - * - * Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and - * &DRM_PRIME_CAP_EXPORT are always advertised. - * - * PRIME buffers are exposed as dma-buf file descriptors. - * See :ref:`prime_buffer_sharing`. - */ -#define DRM_CAP_PRIME 0x5 -/** - * DRM_PRIME_CAP_IMPORT - * - * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME - * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl. - * - * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. - */ -#define DRM_PRIME_CAP_IMPORT 0x1 -/** - * DRM_PRIME_CAP_EXPORT - * - * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME - * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl. - * - * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. - */ -#define DRM_PRIME_CAP_EXPORT 0x2 -/** - * DRM_CAP_TIMESTAMP_MONOTONIC - * - * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in - * struct drm_event_vblank. If set to 1, the kernel will report timestamps with - * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these - * clocks. - * - * Starting from kernel version 2.6.39, the default value for this capability - * is 1. Starting kernel version 4.15, this capability is always set to 1. - */ -#define DRM_CAP_TIMESTAMP_MONOTONIC 0x6 -/** - * DRM_CAP_ASYNC_PAGE_FLIP - * - * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy - * page-flips. - */ -#define DRM_CAP_ASYNC_PAGE_FLIP 0x7 -/** - * DRM_CAP_CURSOR_WIDTH - * - * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid - * width x height combination for the hardware cursor. The intention is that a - * hardware agnostic userspace can query a cursor plane size to use. - * - * Note that the cross-driver contract is to merely return a valid size; - * drivers are free to attach another meaning on top, eg. i915 returns the - * maximum plane size. - */ -#define DRM_CAP_CURSOR_WIDTH 0x8 -/** - * DRM_CAP_CURSOR_HEIGHT - * - * See &DRM_CAP_CURSOR_WIDTH. - */ -#define DRM_CAP_CURSOR_HEIGHT 0x9 -/** - * DRM_CAP_ADDFB2_MODIFIERS - * - * If set to 1, the driver supports supplying modifiers in the - * &DRM_IOCTL_MODE_ADDFB2 ioctl. - */ -#define DRM_CAP_ADDFB2_MODIFIERS 0x10 -/** - * DRM_CAP_PAGE_FLIP_TARGET - * - * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and - * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in - * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP - * ioctl. - */ -#define DRM_CAP_PAGE_FLIP_TARGET 0x11 -/** - * DRM_CAP_CRTC_IN_VBLANK_EVENT - * - * If set to 1, the kernel supports reporting the CRTC ID in - * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and - * &DRM_EVENT_FLIP_COMPLETE events. - * - * Starting kernel version 4.12, this capability is always set to 1. - */ -#define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 -/** - * DRM_CAP_SYNCOBJ - * - * If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`. - */ -#define DRM_CAP_SYNCOBJ 0x13 -/** - * DRM_CAP_SYNCOBJ_TIMELINE - * - * If set to 1, the driver supports timeline operations on sync objects. See - * :ref:`drm_sync_objects`. - */ -#define DRM_CAP_SYNCOBJ_TIMELINE 0x14 -/** - * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP - * - * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic - * commits. - */ -#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15 - -/* DRM_IOCTL_GET_CAP ioctl argument type */ -struct drm_get_cap { - __u64 capability; - __u64 value; -}; - -/** - * DRM_CLIENT_CAP_STEREO_3D - * - * If set to 1, the DRM core will expose the stereo 3D capabilities of the - * monitor by advertising the supported 3D layouts in the flags of struct - * drm_mode_modeinfo. See ``DRM_MODE_FLAG_3D_*``. - * - * This capability is always supported for all drivers starting from kernel - * version 3.13. - */ -#define DRM_CLIENT_CAP_STEREO_3D 1 - -/** - * DRM_CLIENT_CAP_UNIVERSAL_PLANES - * - * If set to 1, the DRM core will expose all planes (overlay, primary, and - * cursor) to userspace. - * - * This capability has been introduced in kernel version 3.15. Starting from - * kernel version 3.17, this capability is always supported for all drivers. - */ -#define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2 - -/** - * DRM_CLIENT_CAP_ATOMIC - * - * If set to 1, the DRM core will expose atomic properties to userspace. This - * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and - * &DRM_CLIENT_CAP_ASPECT_RATIO. - * - * If the driver doesn't support atomic mode-setting, enabling this capability - * will fail with -EOPNOTSUPP. - * - * This capability has been introduced in kernel version 4.0. Starting from - * kernel version 4.2, this capability is always supported for atomic-capable - * drivers. - */ -#define DRM_CLIENT_CAP_ATOMIC 3 - -/** - * DRM_CLIENT_CAP_ASPECT_RATIO - * - * If set to 1, the DRM core will provide aspect ratio information in modes. - * See ``DRM_MODE_FLAG_PIC_AR_*``. - * - * This capability is always supported for all drivers starting from kernel - * version 4.18. - */ -#define DRM_CLIENT_CAP_ASPECT_RATIO 4 - -/** - * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS - * - * If set to 1, the DRM core will expose special connectors to be used for - * writing back to memory the scene setup in the commit. The client must enable - * &DRM_CLIENT_CAP_ATOMIC first. - * - * This capability is always supported for atomic-capable drivers starting from - * kernel version 4.19. - */ -#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 - -/** - * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT - * - * Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and - * virtualbox) have additional restrictions for cursor planes (thus - * making cursor planes on those drivers not truly universal,) e.g. - * they need cursor planes to act like one would expect from a mouse - * cursor and have correctly set hotspot properties. - * If this client cap is not set the DRM core will hide cursor plane on - * those virtualized drivers because not setting it implies that the - * client is not capable of dealing with those extra restictions. - * Clients which do set cursor hotspot and treat the cursor plane - * like a mouse cursor should set this property. - * The client must enable &DRM_CLIENT_CAP_ATOMIC first. - * - * Setting this property on drivers which do not special case - * cursor planes (i.e. non-virtualized drivers) will return - * EOPNOTSUPP, which can be used by userspace to gauge - * requirements of the hardware/drivers they're running on. - * - * This capability is always supported for atomic-capable virtualized - * drivers starting from kernel version 6.6. - */ -#define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT 6 - -/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ -struct drm_set_client_cap { - __u64 capability; - __u64 value; -}; - -#define DRM_RDWR O_RDWR -#define DRM_CLOEXEC O_CLOEXEC -struct drm_prime_handle { - __u32 handle; - - /** Flags.. only applicable for handle->fd */ - __u32 flags; - - /** Returned dmabuf file descriptor */ - __s32 fd; -}; - -struct drm_syncobj_create { - __u32 handle; -#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0) - __u32 flags; -}; - -struct drm_syncobj_destroy { - __u32 handle; - __u32 pad; -}; - -#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0) -#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0) -struct drm_syncobj_handle { - __u32 handle; - __u32 flags; - - __s32 fd; - __u32 pad; -}; - -struct drm_syncobj_transfer { - __u32 src_handle; - __u32 dst_handle; - __u64 src_point; - __u64 dst_point; - __u32 flags; - __u32 pad; -}; - -#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) -#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) -#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */ -#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE (1 << 3) /* set fence deadline to deadline_nsec */ -struct drm_syncobj_wait { - __u64 handles; - /* absolute timeout */ - __s64 timeout_nsec; - __u32 count_handles; - __u32 flags; - __u32 first_signaled; /* only valid when not waiting all */ - __u32 pad; - /** - * @deadline_nsec - fence deadline hint - * - * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing - * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is - * set. - */ - __u64 deadline_nsec; -}; - -struct drm_syncobj_timeline_wait { - __u64 handles; - /* wait on specific timeline point for every handles*/ - __u64 points; - /* absolute timeout */ - __s64 timeout_nsec; - __u32 count_handles; - __u32 flags; - __u32 first_signaled; /* only valid when not waiting all */ - __u32 pad; - /** - * @deadline_nsec - fence deadline hint - * - * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing - * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is - * set. - */ - __u64 deadline_nsec; -}; - -/** - * struct drm_syncobj_eventfd - * @handle: syncobj handle. - * @flags: Zero to wait for the point to be signalled, or - * &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be - * available for the point. - * @point: syncobj timeline point (set to zero for binary syncobjs). - * @fd: Existing eventfd to sent events to. - * @pad: Must be zero. - * - * Register an eventfd to be signalled by a syncobj. The eventfd counter will - * be incremented by one. - */ -struct drm_syncobj_eventfd { - __u32 handle; - __u32 flags; - __u64 point; - __s32 fd; - __u32 pad; -}; - - -struct drm_syncobj_array { - __u64 handles; - __u32 count_handles; - __u32 pad; -}; - -#define DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED (1 << 0) /* last available point on timeline syncobj */ -struct drm_syncobj_timeline_array { - __u64 handles; - __u64 points; - __u32 count_handles; - __u32 flags; -}; - - -/* Query current scanout sequence number */ -struct drm_crtc_get_sequence { - __u32 crtc_id; /* requested crtc_id */ - __u32 active; /* return: crtc output is active */ - __u64 sequence; /* return: most recent vblank sequence */ - __s64 sequence_ns; /* return: most recent time of first pixel out */ -}; - -/* Queue event to be delivered at specified sequence. Time stamp marks - * when the first pixel of the refresh cycle leaves the display engine - * for the display - */ -#define DRM_CRTC_SEQUENCE_RELATIVE 0x00000001 /* sequence is relative to current */ -#define DRM_CRTC_SEQUENCE_NEXT_ON_MISS 0x00000002 /* Use next sequence if we've missed */ - -struct drm_crtc_queue_sequence { - __u32 crtc_id; - __u32 flags; - __u64 sequence; /* on input, target sequence. on output, actual sequence */ - __u64 user_data; /* user data passed to event */ -}; - -#define DRM_CLIENT_NAME_MAX_LEN 64 -struct drm_set_client_name { - __u64 name_len; - __u64 name; -}; - - -#if defined(__cplusplus) -} -#endif - -#include "drm_mode.h" - -#if defined(__cplusplus) -extern "C" { -#endif - -#define DRM_IOCTL_BASE 'd' -#define DRM_IO(nr) _IO(DRM_IOCTL_BASE,nr) -#define DRM_IOR(nr,type) _IOR(DRM_IOCTL_BASE,nr,type) -#define DRM_IOW(nr,type) _IOW(DRM_IOCTL_BASE,nr,type) -#define DRM_IOWR(nr,type) _IOWR(DRM_IOCTL_BASE,nr,type) - -#define DRM_IOCTL_VERSION DRM_IOWR(0x00, struct drm_version) -#define DRM_IOCTL_GET_UNIQUE DRM_IOWR(0x01, struct drm_unique) -#define DRM_IOCTL_GET_MAGIC DRM_IOR( 0x02, struct drm_auth) -#define DRM_IOCTL_IRQ_BUSID DRM_IOWR(0x03, struct drm_irq_busid) -#define DRM_IOCTL_GET_MAP DRM_IOWR(0x04, struct drm_map) -#define DRM_IOCTL_GET_CLIENT DRM_IOWR(0x05, struct drm_client) -#define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats) -#define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version) -#define DRM_IOCTL_MODESET_CTL DRM_IOW(0x08, struct drm_modeset_ctl) -/** - * DRM_IOCTL_GEM_CLOSE - Close a GEM handle. - * - * GEM handles are not reference-counted by the kernel. User-space is - * responsible for managing their lifetime. For example, if user-space imports - * the same memory object twice on the same DRM file description, the same GEM - * handle is returned by both imports, and user-space needs to ensure - * &DRM_IOCTL_GEM_CLOSE is performed once only. The same situation can happen - * when a memory object is allocated, then exported and imported again on the - * same DRM file description. The &DRM_IOCTL_MODE_GETFB2 IOCTL is an exception - * and always returns fresh new GEM handles even if an existing GEM handle - * already refers to the same memory object before the IOCTL is performed. - */ -#define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close) -#define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink) -#define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open) -#define DRM_IOCTL_GET_CAP DRM_IOWR(0x0c, struct drm_get_cap) -#define DRM_IOCTL_SET_CLIENT_CAP DRM_IOW( 0x0d, struct drm_set_client_cap) - -#define DRM_IOCTL_SET_UNIQUE DRM_IOW( 0x10, struct drm_unique) -#define DRM_IOCTL_AUTH_MAGIC DRM_IOW( 0x11, struct drm_auth) -#define DRM_IOCTL_BLOCK DRM_IOWR(0x12, struct drm_block) -#define DRM_IOCTL_UNBLOCK DRM_IOWR(0x13, struct drm_block) -#define DRM_IOCTL_CONTROL DRM_IOW( 0x14, struct drm_control) -#define DRM_IOCTL_ADD_MAP DRM_IOWR(0x15, struct drm_map) -#define DRM_IOCTL_ADD_BUFS DRM_IOWR(0x16, struct drm_buf_desc) -#define DRM_IOCTL_MARK_BUFS DRM_IOW( 0x17, struct drm_buf_desc) -#define DRM_IOCTL_INFO_BUFS DRM_IOWR(0x18, struct drm_buf_info) -#define DRM_IOCTL_MAP_BUFS DRM_IOWR(0x19, struct drm_buf_map) -#define DRM_IOCTL_FREE_BUFS DRM_IOW( 0x1a, struct drm_buf_free) - -#define DRM_IOCTL_RM_MAP DRM_IOW( 0x1b, struct drm_map) - -#define DRM_IOCTL_SET_SAREA_CTX DRM_IOW( 0x1c, struct drm_ctx_priv_map) -#define DRM_IOCTL_GET_SAREA_CTX DRM_IOWR(0x1d, struct drm_ctx_priv_map) - -#define DRM_IOCTL_SET_MASTER DRM_IO(0x1e) -#define DRM_IOCTL_DROP_MASTER DRM_IO(0x1f) - -#define DRM_IOCTL_ADD_CTX DRM_IOWR(0x20, struct drm_ctx) -#define DRM_IOCTL_RM_CTX DRM_IOWR(0x21, struct drm_ctx) -#define DRM_IOCTL_MOD_CTX DRM_IOW( 0x22, struct drm_ctx) -#define DRM_IOCTL_GET_CTX DRM_IOWR(0x23, struct drm_ctx) -#define DRM_IOCTL_SWITCH_CTX DRM_IOW( 0x24, struct drm_ctx) -#define DRM_IOCTL_NEW_CTX DRM_IOW( 0x25, struct drm_ctx) -#define DRM_IOCTL_RES_CTX DRM_IOWR(0x26, struct drm_ctx_res) -#define DRM_IOCTL_ADD_DRAW DRM_IOWR(0x27, struct drm_draw) -#define DRM_IOCTL_RM_DRAW DRM_IOWR(0x28, struct drm_draw) -#define DRM_IOCTL_DMA DRM_IOWR(0x29, struct drm_dma) -#define DRM_IOCTL_LOCK DRM_IOW( 0x2a, struct drm_lock) -#define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock) -#define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock) - -/** - * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD. - * - * User-space sets &drm_prime_handle.handle with the GEM handle to export and - * &drm_prime_handle.flags, and gets back a DMA-BUF file descriptor in - * &drm_prime_handle.fd. - * - * The export can fail for any driver-specific reason, e.g. because export is - * not supported for this specific GEM handle (but might be for others). - * - * Support for exporting DMA-BUFs is advertised via &DRM_PRIME_CAP_EXPORT. - */ -#define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle) -/** - * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle. - * - * User-space sets &drm_prime_handle.fd with a DMA-BUF file descriptor to - * import, and gets back a GEM handle in &drm_prime_handle.handle. - * &drm_prime_handle.flags is unused. - * - * If an existing GEM handle refers to the memory object backing the DMA-BUF, - * that GEM handle is returned. Therefore user-space which needs to handle - * arbitrary DMA-BUFs must have a user-space lookup data structure to manually - * reference-count duplicated GEM handles. For more information see - * &DRM_IOCTL_GEM_CLOSE. - * - * The import can fail for any driver-specific reason, e.g. because import is - * only supported for DMA-BUFs allocated on this DRM device. - * - * Support for importing DMA-BUFs is advertised via &DRM_PRIME_CAP_IMPORT. - */ -#define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle) - -#define DRM_IOCTL_AGP_ACQUIRE DRM_IO( 0x30) -#define DRM_IOCTL_AGP_RELEASE DRM_IO( 0x31) -#define DRM_IOCTL_AGP_ENABLE DRM_IOW( 0x32, struct drm_agp_mode) -#define DRM_IOCTL_AGP_INFO DRM_IOR( 0x33, struct drm_agp_info) -#define DRM_IOCTL_AGP_ALLOC DRM_IOWR(0x34, struct drm_agp_buffer) -#define DRM_IOCTL_AGP_FREE DRM_IOW( 0x35, struct drm_agp_buffer) -#define DRM_IOCTL_AGP_BIND DRM_IOW( 0x36, struct drm_agp_binding) -#define DRM_IOCTL_AGP_UNBIND DRM_IOW( 0x37, struct drm_agp_binding) - -#define DRM_IOCTL_SG_ALLOC DRM_IOWR(0x38, struct drm_scatter_gather) -#define DRM_IOCTL_SG_FREE DRM_IOW( 0x39, struct drm_scatter_gather) - -#define DRM_IOCTL_WAIT_VBLANK DRM_IOWR(0x3a, union drm_wait_vblank) - -#define DRM_IOCTL_CRTC_GET_SEQUENCE DRM_IOWR(0x3b, struct drm_crtc_get_sequence) -#define DRM_IOCTL_CRTC_QUEUE_SEQUENCE DRM_IOWR(0x3c, struct drm_crtc_queue_sequence) - -#define DRM_IOCTL_UPDATE_DRAW DRM_IOW(0x3f, struct drm_update_draw) - -#define DRM_IOCTL_MODE_GETRESOURCES DRM_IOWR(0xA0, struct drm_mode_card_res) -#define DRM_IOCTL_MODE_GETCRTC DRM_IOWR(0xA1, struct drm_mode_crtc) -#define DRM_IOCTL_MODE_SETCRTC DRM_IOWR(0xA2, struct drm_mode_crtc) -#define DRM_IOCTL_MODE_CURSOR DRM_IOWR(0xA3, struct drm_mode_cursor) -#define DRM_IOCTL_MODE_GETGAMMA DRM_IOWR(0xA4, struct drm_mode_crtc_lut) -#define DRM_IOCTL_MODE_SETGAMMA DRM_IOWR(0xA5, struct drm_mode_crtc_lut) -#define DRM_IOCTL_MODE_GETENCODER DRM_IOWR(0xA6, struct drm_mode_get_encoder) -#define DRM_IOCTL_MODE_GETCONNECTOR DRM_IOWR(0xA7, struct drm_mode_get_connector) -#define DRM_IOCTL_MODE_ATTACHMODE DRM_IOWR(0xA8, struct drm_mode_mode_cmd) /* deprecated (never worked) */ -#define DRM_IOCTL_MODE_DETACHMODE DRM_IOWR(0xA9, struct drm_mode_mode_cmd) /* deprecated (never worked) */ - -#define DRM_IOCTL_MODE_GETPROPERTY DRM_IOWR(0xAA, struct drm_mode_get_property) -#define DRM_IOCTL_MODE_SETPROPERTY DRM_IOWR(0xAB, struct drm_mode_connector_set_property) -#define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob) -#define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd) -#define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd) -/** - * DRM_IOCTL_MODE_RMFB - Remove a framebuffer. - * - * This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL - * argument is a framebuffer object ID. - * - * Warning: removing a framebuffer currently in-use on an enabled plane will - * disable that plane. The CRTC the plane is linked to may also be disabled - * (depending on driver capabilities). - */ -#define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int) -#define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) -#define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) - -/** - * DRM_IOCTL_MODE_CREATE_DUMB - Create a new dumb buffer object. - * - * KMS dumb buffers provide a very primitive way to allocate a buffer object - * suitable for scanout and map it for software rendering. KMS dumb buffers are - * not suitable for hardware-accelerated rendering nor video decoding. KMS dumb - * buffers are not suitable to be displayed on any other device than the KMS - * device where they were allocated from. Also see - * :ref:`kms_dumb_buffer_objects`. - * - * The IOCTL argument is a struct drm_mode_create_dumb. - * - * User-space is expected to create a KMS dumb buffer via this IOCTL, then add - * it as a KMS framebuffer via &DRM_IOCTL_MODE_ADDFB and map it via - * &DRM_IOCTL_MODE_MAP_DUMB. - * - * &DRM_CAP_DUMB_BUFFER indicates whether this IOCTL is supported. - * &DRM_CAP_DUMB_PREFERRED_DEPTH and &DRM_CAP_DUMB_PREFER_SHADOW indicate - * driver preferences for dumb buffers. - */ -#define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb) -#define DRM_IOCTL_MODE_MAP_DUMB DRM_IOWR(0xB3, struct drm_mode_map_dumb) -#define DRM_IOCTL_MODE_DESTROY_DUMB DRM_IOWR(0xB4, struct drm_mode_destroy_dumb) -#define DRM_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xB5, struct drm_mode_get_plane_res) -#define DRM_IOCTL_MODE_GETPLANE DRM_IOWR(0xB6, struct drm_mode_get_plane) -#define DRM_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct drm_mode_set_plane) -#define DRM_IOCTL_MODE_ADDFB2 DRM_IOWR(0xB8, struct drm_mode_fb_cmd2) -#define DRM_IOCTL_MODE_OBJ_GETPROPERTIES DRM_IOWR(0xB9, struct drm_mode_obj_get_properties) -#define DRM_IOCTL_MODE_OBJ_SETPROPERTY DRM_IOWR(0xBA, struct drm_mode_obj_set_property) -#define DRM_IOCTL_MODE_CURSOR2 DRM_IOWR(0xBB, struct drm_mode_cursor2) -#define DRM_IOCTL_MODE_ATOMIC DRM_IOWR(0xBC, struct drm_mode_atomic) -#define DRM_IOCTL_MODE_CREATEPROPBLOB DRM_IOWR(0xBD, struct drm_mode_create_blob) -#define DRM_IOCTL_MODE_DESTROYPROPBLOB DRM_IOWR(0xBE, struct drm_mode_destroy_blob) - -#define DRM_IOCTL_SYNCOBJ_CREATE DRM_IOWR(0xBF, struct drm_syncobj_create) -#define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) -#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) -#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) -#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) -#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) -#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array) - -#define DRM_IOCTL_MODE_CREATE_LEASE DRM_IOWR(0xC6, struct drm_mode_create_lease) -#define DRM_IOCTL_MODE_LIST_LESSEES DRM_IOWR(0xC7, struct drm_mode_list_lessees) -#define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct drm_mode_get_lease) -#define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct drm_mode_revoke_lease) - -#define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait) -#define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct drm_syncobj_timeline_array) -#define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) -#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) - -/** - * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata. - * - * This queries metadata about a framebuffer. User-space fills - * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the - * struct as the output. - * - * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles - * will be filled with GEM buffer handles. Fresh new GEM handles are always - * returned, even if another GEM handle referring to the same memory object - * already exists on the DRM file description. The caller is responsible for - * removing the new handles, e.g. via the &DRM_IOCTL_GEM_CLOSE IOCTL. The same - * new handle will be returned for multiple planes in case they use the same - * memory object. Planes are valid until one has a zero handle -- this can be - * used to compute the number of planes. - * - * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid - * until one has a zero &drm_mode_fb_cmd2.pitches. - * - * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set - * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the - * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier. - * - * To obtain DMA-BUF FDs for each plane without leaking GEM handles, user-space - * can export each handle via &DRM_IOCTL_PRIME_HANDLE_TO_FD, then immediately - * close each unique handle via &DRM_IOCTL_GEM_CLOSE, making sure to not - * double-close handles which are specified multiple times in the array. - */ -#define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) - -#define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd) - -/** - * DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer. - * - * This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL - * argument is a framebuffer object ID. - * - * This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable - * planes and CRTCs. As long as the framebuffer is used by a plane, it's kept - * alive. When the plane no longer uses the framebuffer (because the - * framebuffer is replaced with another one, or the plane is disabled), the - * framebuffer is cleaned up. - * - * This is useful to implement flicker-free transitions between two processes. - * - * Depending on the threat model, user-space may want to ensure that the - * framebuffer doesn't expose any sensitive user information: closed - * framebuffers attached to a plane can be read back by the next DRM master. - */ -#define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) - -/** - * DRM_IOCTL_SET_CLIENT_NAME - Attach a name to a drm_file - * - * Having a name allows for easier tracking and debugging. - * The length of the name (without null ending char) must be - * <= DRM_CLIENT_NAME_MAX_LEN. - * The call will fail if the name contains whitespaces or non-printable chars. - */ -#define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name) - -/* - * Device specific ioctls should only be in their respective headers - * The device specific ioctl range is from 0x40 to 0x9f. - * Generic IOCTLS restart at 0xA0. - * - * \sa drmCommandNone(), drmCommandRead(), drmCommandWrite(), and - * drmCommandReadWrite(). - */ -#define DRM_COMMAND_BASE 0x40 -#define DRM_COMMAND_END 0xA0 - -/** - * struct drm_event - Header for DRM events - * @type: event type. - * @length: total number of payload bytes (including header). - * - * This struct is a header for events written back to user-space on the DRM FD. - * A read on the DRM FD will always only return complete events: e.g. if the - * read buffer is 100 bytes large and there are two 64 byte events pending, - * only one will be returned. - * - * Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and - * up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK, - * &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE. - */ -struct drm_event { - __u32 type; - __u32 length; -}; - -/** - * DRM_EVENT_VBLANK - vertical blanking event - * - * This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the - * &_DRM_VBLANK_EVENT flag set. - * - * The event payload is a struct drm_event_vblank. - */ -#define DRM_EVENT_VBLANK 0x01 -/** - * DRM_EVENT_FLIP_COMPLETE - page-flip completion event - * - * This event is sent in response to an atomic commit or legacy page-flip with - * the &DRM_MODE_PAGE_FLIP_EVENT flag set. - * - * The event payload is a struct drm_event_vblank. - */ -#define DRM_EVENT_FLIP_COMPLETE 0x02 -/** - * DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event - * - * This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE. - * - * The event payload is a struct drm_event_crtc_sequence. - */ -#define DRM_EVENT_CRTC_SEQUENCE 0x03 - -struct drm_event_vblank { - struct drm_event base; - __u64 user_data; - __u32 tv_sec; - __u32 tv_usec; - __u32 sequence; - __u32 crtc_id; /* 0 on older kernels that do not support this */ -}; - -/* Event delivered at sequence. Time stamp marks when the first pixel - * of the refresh cycle leaves the display engine for the display - */ -struct drm_event_crtc_sequence { - struct drm_event base; - __u64 user_data; - __s64 time_ns; - __u64 sequence; -}; - -/* typedef area */ -#ifndef __KERNEL__ -typedef struct drm_clip_rect drm_clip_rect_t; -typedef struct drm_drawable_info drm_drawable_info_t; -typedef struct drm_tex_region drm_tex_region_t; -typedef struct drm_hw_lock drm_hw_lock_t; -typedef struct drm_version drm_version_t; -typedef struct drm_unique drm_unique_t; -typedef struct drm_list drm_list_t; -typedef struct drm_block drm_block_t; -typedef struct drm_control drm_control_t; -typedef enum drm_map_type drm_map_type_t; -typedef enum drm_map_flags drm_map_flags_t; -typedef struct drm_ctx_priv_map drm_ctx_priv_map_t; -typedef struct drm_map drm_map_t; -typedef struct drm_client drm_client_t; -typedef enum drm_stat_type drm_stat_type_t; -typedef struct drm_stats drm_stats_t; -typedef enum drm_lock_flags drm_lock_flags_t; -typedef struct drm_lock drm_lock_t; -typedef enum drm_dma_flags drm_dma_flags_t; -typedef struct drm_buf_desc drm_buf_desc_t; -typedef struct drm_buf_info drm_buf_info_t; -typedef struct drm_buf_free drm_buf_free_t; -typedef struct drm_buf_pub drm_buf_pub_t; -typedef struct drm_buf_map drm_buf_map_t; -typedef struct drm_dma drm_dma_t; -typedef union drm_wait_vblank drm_wait_vblank_t; -typedef struct drm_agp_mode drm_agp_mode_t; -typedef enum drm_ctx_flags drm_ctx_flags_t; -typedef struct drm_ctx drm_ctx_t; -typedef struct drm_ctx_res drm_ctx_res_t; -typedef struct drm_draw drm_draw_t; -typedef struct drm_update_draw drm_update_draw_t; -typedef struct drm_auth drm_auth_t; -typedef struct drm_irq_busid drm_irq_busid_t; -typedef enum drm_vblank_seq_type drm_vblank_seq_type_t; - -typedef struct drm_agp_buffer drm_agp_buffer_t; -typedef struct drm_agp_binding drm_agp_binding_t; -typedef struct drm_agp_info drm_agp_info_t; -typedef struct drm_scatter_gather drm_scatter_gather_t; -typedef struct drm_set_version drm_set_version_t; -#endif - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h deleted file mode 100644 index 535cb68fdb5c..000000000000 --- a/tools/include/uapi/drm/i915_drm.h +++ /dev/null @@ -1,3916 +0,0 @@ -/* - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef _UAPI_I915_DRM_H_ -#define _UAPI_I915_DRM_H_ - -#include "drm.h" - -#if defined(__cplusplus) -extern "C" { -#endif - -/* Please note that modifications to all structs defined here are - * subject to backwards-compatibility constraints. - */ - -/** - * DOC: uevents generated by i915 on its device node - * - * I915_L3_PARITY_UEVENT - Generated when the driver receives a parity mismatch - * event from the GPU L3 cache. Additional information supplied is ROW, - * BANK, SUBBANK, SLICE of the affected cacheline. Userspace should keep - * track of these events, and if a specific cache-line seems to have a - * persistent error, remap it with the L3 remapping tool supplied in - * intel-gpu-tools. The value supplied with the event is always 1. - * - * I915_ERROR_UEVENT - Generated upon error detection, currently only via - * hangcheck. The error detection event is a good indicator of when things - * began to go badly. The value supplied with the event is a 1 upon error - * detection, and a 0 upon reset completion, signifying no more error - * exists. NOTE: Disabling hangcheck or reset via module parameter will - * cause the related events to not be seen. - * - * I915_RESET_UEVENT - Event is generated just before an attempt to reset the - * GPU. The value supplied with the event is always 1. NOTE: Disable - * reset via module parameter will cause this event to not be seen. - */ -#define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR" -#define I915_ERROR_UEVENT "ERROR" -#define I915_RESET_UEVENT "RESET" - -/** - * struct i915_user_extension - Base class for defining a chain of extensions - * - * Many interfaces need to grow over time. In most cases we can simply - * extend the struct and have userspace pass in more data. Another option, - * as demonstrated by Vulkan's approach to providing extensions for forward - * and backward compatibility, is to use a list of optional structs to - * provide those extra details. - * - * The key advantage to using an extension chain is that it allows us to - * redefine the interface more easily than an ever growing struct of - * increasing complexity, and for large parts of that interface to be - * entirely optional. The downside is more pointer chasing; chasing across - * the __user boundary with pointers encapsulated inside u64. - * - * Example chaining: - * - * .. code-block:: C - * - * struct i915_user_extension ext3 { - * .next_extension = 0, // end - * .name = ..., - * }; - * struct i915_user_extension ext2 { - * .next_extension = (uintptr_t)&ext3, - * .name = ..., - * }; - * struct i915_user_extension ext1 { - * .next_extension = (uintptr_t)&ext2, - * .name = ..., - * }; - * - * Typically the struct i915_user_extension would be embedded in some uAPI - * struct, and in this case we would feed it the head of the chain(i.e ext1), - * which would then apply all of the above extensions. - * - */ -struct i915_user_extension { - /** - * @next_extension: - * - * Pointer to the next struct i915_user_extension, or zero if the end. - */ - __u64 next_extension; - /** - * @name: Name of the extension. - * - * Note that the name here is just some integer. - * - * Also note that the name space for this is not global for the whole - * driver, but rather its scope/meaning is limited to the specific piece - * of uAPI which has embedded the struct i915_user_extension. - */ - __u32 name; - /** - * @flags: MBZ - * - * All undefined bits must be zero. - */ - __u32 flags; - /** - * @rsvd: MBZ - * - * Reserved for future use; must be zero. - */ - __u32 rsvd[4]; -}; - -/* - * MOCS indexes used for GPU surfaces, defining the cacheability of the - * surface data and the coherency for this data wrt. CPU vs. GPU accesses. - */ -enum i915_mocs_table_index { - /* - * Not cached anywhere, coherency between CPU and GPU accesses is - * guaranteed. - */ - I915_MOCS_UNCACHED, - /* - * Cacheability and coherency controlled by the kernel automatically - * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current - * usage of the surface (used for display scanout or not). - */ - I915_MOCS_PTE, - /* - * Cached in all GPU caches available on the platform. - * Coherency between CPU and GPU accesses to the surface is not - * guaranteed without extra synchronization. - */ - I915_MOCS_CACHED, -}; - -/** - * enum drm_i915_gem_engine_class - uapi engine type enumeration - * - * Different engines serve different roles, and there may be more than one - * engine serving each role. This enum provides a classification of the role - * of the engine, which may be used when requesting operations to be performed - * on a certain subset of engines, or for providing information about that - * group. - */ -enum drm_i915_gem_engine_class { - /** - * @I915_ENGINE_CLASS_RENDER: - * - * Render engines support instructions used for 3D, Compute (GPGPU), - * and programmable media workloads. These instructions fetch data and - * dispatch individual work items to threads that operate in parallel. - * The threads run small programs (called "kernels" or "shaders") on - * the GPU's execution units (EUs). - */ - I915_ENGINE_CLASS_RENDER = 0, - - /** - * @I915_ENGINE_CLASS_COPY: - * - * Copy engines (also referred to as "blitters") support instructions - * that move blocks of data from one location in memory to another, - * or that fill a specified location of memory with fixed data. - * Copy engines can perform pre-defined logical or bitwise operations - * on the source, destination, or pattern data. - */ - I915_ENGINE_CLASS_COPY = 1, - - /** - * @I915_ENGINE_CLASS_VIDEO: - * - * Video engines (also referred to as "bit stream decode" (BSD) or - * "vdbox") support instructions that perform fixed-function media - * decode and encode. - */ - I915_ENGINE_CLASS_VIDEO = 2, - - /** - * @I915_ENGINE_CLASS_VIDEO_ENHANCE: - * - * Video enhancement engines (also referred to as "vebox") support - * instructions related to image enhancement. - */ - I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, - - /** - * @I915_ENGINE_CLASS_COMPUTE: - * - * Compute engines support a subset of the instructions available - * on render engines: compute engines support Compute (GPGPU) and - * programmable media workloads, but do not support the 3D pipeline. - */ - I915_ENGINE_CLASS_COMPUTE = 4, - - /* Values in this enum should be kept compact. */ - - /** - * @I915_ENGINE_CLASS_INVALID: - * - * Placeholder value to represent an invalid engine class assignment. - */ - I915_ENGINE_CLASS_INVALID = -1 -}; - -/** - * struct i915_engine_class_instance - Engine class/instance identifier - * - * There may be more than one engine fulfilling any role within the system. - * Each engine of a class is given a unique instance number and therefore - * any engine can be specified by its class:instance tuplet. APIs that allow - * access to any engine in the system will use struct i915_engine_class_instance - * for this identification. - */ -struct i915_engine_class_instance { - /** - * @engine_class: - * - * Engine class from enum drm_i915_gem_engine_class - */ - __u16 engine_class; -#define I915_ENGINE_CLASS_INVALID_NONE -1 -#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2 - - /** - * @engine_instance: - * - * Engine instance. - */ - __u16 engine_instance; -}; - -/** - * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915 - * - */ - -enum drm_i915_pmu_engine_sample { - I915_SAMPLE_BUSY = 0, - I915_SAMPLE_WAIT = 1, - I915_SAMPLE_SEMA = 2 -}; - -#define I915_PMU_SAMPLE_BITS (4) -#define I915_PMU_SAMPLE_MASK (0xf) -#define I915_PMU_SAMPLE_INSTANCE_BITS (8) -#define I915_PMU_CLASS_SHIFT \ - (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS) - -#define __I915_PMU_ENGINE(class, instance, sample) \ - ((class) << I915_PMU_CLASS_SHIFT | \ - (instance) << I915_PMU_SAMPLE_BITS | \ - (sample)) - -#define I915_PMU_ENGINE_BUSY(class, instance) \ - __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY) - -#define I915_PMU_ENGINE_WAIT(class, instance) \ - __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT) - -#define I915_PMU_ENGINE_SEMA(class, instance) \ - __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA) - -/* - * Top 4 bits of every non-engine counter are GT id. - */ -#define __I915_PMU_GT_SHIFT (60) - -#define ___I915_PMU_OTHER(gt, x) \ - (((__u64)__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) | \ - ((__u64)(gt) << __I915_PMU_GT_SHIFT)) - -#define __I915_PMU_OTHER(x) ___I915_PMU_OTHER(0, x) - -#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) -#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) -#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) -#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) -#define I915_PMU_SOFTWARE_GT_AWAKE_TIME __I915_PMU_OTHER(4) - -#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY - -#define __I915_PMU_ACTUAL_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 0) -#define __I915_PMU_REQUESTED_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 1) -#define __I915_PMU_INTERRUPTS(gt) ___I915_PMU_OTHER(gt, 2) -#define __I915_PMU_RC6_RESIDENCY(gt) ___I915_PMU_OTHER(gt, 3) -#define __I915_PMU_SOFTWARE_GT_AWAKE_TIME(gt) ___I915_PMU_OTHER(gt, 4) - -/* Each region is a minimum of 16k, and there are at most 255 of them. - */ -#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use - * of chars for next/prev indices */ -#define I915_LOG_MIN_TEX_REGION_SIZE 14 - -typedef struct _drm_i915_init { - enum { - I915_INIT_DMA = 0x01, - I915_CLEANUP_DMA = 0x02, - I915_RESUME_DMA = 0x03 - } func; - unsigned int mmio_offset; - int sarea_priv_offset; - unsigned int ring_start; - unsigned int ring_end; - unsigned int ring_size; - unsigned int front_offset; - unsigned int back_offset; - unsigned int depth_offset; - unsigned int w; - unsigned int h; - unsigned int pitch; - unsigned int pitch_bits; - unsigned int back_pitch; - unsigned int depth_pitch; - unsigned int cpp; - unsigned int chipset; -} drm_i915_init_t; - -typedef struct _drm_i915_sarea { - struct drm_tex_region texList[I915_NR_TEX_REGIONS + 1]; - int last_upload; /* last time texture was uploaded */ - int last_enqueue; /* last time a buffer was enqueued */ - int last_dispatch; /* age of the most recently dispatched buffer */ - int ctxOwner; /* last context to upload state */ - int texAge; - int pf_enabled; /* is pageflipping allowed? */ - int pf_active; - int pf_current_page; /* which buffer is being displayed? */ - int perf_boxes; /* performance boxes to be displayed */ - int width, height; /* screen size in pixels */ - - drm_handle_t front_handle; - int front_offset; - int front_size; - - drm_handle_t back_handle; - int back_offset; - int back_size; - - drm_handle_t depth_handle; - int depth_offset; - int depth_size; - - drm_handle_t tex_handle; - int tex_offset; - int tex_size; - int log_tex_granularity; - int pitch; - int rotation; /* 0, 90, 180 or 270 */ - int rotated_offset; - int rotated_size; - int rotated_pitch; - int virtualX, virtualY; - - unsigned int front_tiled; - unsigned int back_tiled; - unsigned int depth_tiled; - unsigned int rotated_tiled; - unsigned int rotated2_tiled; - - int pipeA_x; - int pipeA_y; - int pipeA_w; - int pipeA_h; - int pipeB_x; - int pipeB_y; - int pipeB_w; - int pipeB_h; - - /* fill out some space for old userspace triple buffer */ - drm_handle_t unused_handle; - __u32 unused1, unused2, unused3; - - /* buffer object handles for static buffers. May change - * over the lifetime of the client. - */ - __u32 front_bo_handle; - __u32 back_bo_handle; - __u32 unused_bo_handle; - __u32 depth_bo_handle; - -} drm_i915_sarea_t; - -/* due to userspace building against these headers we need some compat here */ -#define planeA_x pipeA_x -#define planeA_y pipeA_y -#define planeA_w pipeA_w -#define planeA_h pipeA_h -#define planeB_x pipeB_x -#define planeB_y pipeB_y -#define planeB_w pipeB_w -#define planeB_h pipeB_h - -/* Flags for perf_boxes - */ -#define I915_BOX_RING_EMPTY 0x1 -#define I915_BOX_FLIP 0x2 -#define I915_BOX_WAIT 0x4 -#define I915_BOX_TEXTURE_LOAD 0x8 -#define I915_BOX_LOST_CONTEXT 0x10 - -/* - * i915 specific ioctls. - * - * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie - * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset - * against DRM_COMMAND_BASE and should be between [0x0, 0x60). - */ -#define DRM_I915_INIT 0x00 -#define DRM_I915_FLUSH 0x01 -#define DRM_I915_FLIP 0x02 -#define DRM_I915_BATCHBUFFER 0x03 -#define DRM_I915_IRQ_EMIT 0x04 -#define DRM_I915_IRQ_WAIT 0x05 -#define DRM_I915_GETPARAM 0x06 -#define DRM_I915_SETPARAM 0x07 -#define DRM_I915_ALLOC 0x08 -#define DRM_I915_FREE 0x09 -#define DRM_I915_INIT_HEAP 0x0a -#define DRM_I915_CMDBUFFER 0x0b -#define DRM_I915_DESTROY_HEAP 0x0c -#define DRM_I915_SET_VBLANK_PIPE 0x0d -#define DRM_I915_GET_VBLANK_PIPE 0x0e -#define DRM_I915_VBLANK_SWAP 0x0f -#define DRM_I915_HWS_ADDR 0x11 -#define DRM_I915_GEM_INIT 0x13 -#define DRM_I915_GEM_EXECBUFFER 0x14 -#define DRM_I915_GEM_PIN 0x15 -#define DRM_I915_GEM_UNPIN 0x16 -#define DRM_I915_GEM_BUSY 0x17 -#define DRM_I915_GEM_THROTTLE 0x18 -#define DRM_I915_GEM_ENTERVT 0x19 -#define DRM_I915_GEM_LEAVEVT 0x1a -#define DRM_I915_GEM_CREATE 0x1b -#define DRM_I915_GEM_PREAD 0x1c -#define DRM_I915_GEM_PWRITE 0x1d -#define DRM_I915_GEM_MMAP 0x1e -#define DRM_I915_GEM_SET_DOMAIN 0x1f -#define DRM_I915_GEM_SW_FINISH 0x20 -#define DRM_I915_GEM_SET_TILING 0x21 -#define DRM_I915_GEM_GET_TILING 0x22 -#define DRM_I915_GEM_GET_APERTURE 0x23 -#define DRM_I915_GEM_MMAP_GTT 0x24 -#define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25 -#define DRM_I915_GEM_MADVISE 0x26 -#define DRM_I915_OVERLAY_PUT_IMAGE 0x27 -#define DRM_I915_OVERLAY_ATTRS 0x28 -#define DRM_I915_GEM_EXECBUFFER2 0x29 -#define DRM_I915_GEM_EXECBUFFER2_WR DRM_I915_GEM_EXECBUFFER2 -#define DRM_I915_GET_SPRITE_COLORKEY 0x2a -#define DRM_I915_SET_SPRITE_COLORKEY 0x2b -#define DRM_I915_GEM_WAIT 0x2c -#define DRM_I915_GEM_CONTEXT_CREATE 0x2d -#define DRM_I915_GEM_CONTEXT_DESTROY 0x2e -#define DRM_I915_GEM_SET_CACHING 0x2f -#define DRM_I915_GEM_GET_CACHING 0x30 -#define DRM_I915_REG_READ 0x31 -#define DRM_I915_GET_RESET_STATS 0x32 -#define DRM_I915_GEM_USERPTR 0x33 -#define DRM_I915_GEM_CONTEXT_GETPARAM 0x34 -#define DRM_I915_GEM_CONTEXT_SETPARAM 0x35 -#define DRM_I915_PERF_OPEN 0x36 -#define DRM_I915_PERF_ADD_CONFIG 0x37 -#define DRM_I915_PERF_REMOVE_CONFIG 0x38 -#define DRM_I915_QUERY 0x39 -#define DRM_I915_GEM_VM_CREATE 0x3a -#define DRM_I915_GEM_VM_DESTROY 0x3b -#define DRM_I915_GEM_CREATE_EXT 0x3c -/* Must be kept compact -- no holes */ - -#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) -#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) -#define DRM_IOCTL_I915_FLIP DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLIP) -#define DRM_IOCTL_I915_BATCHBUFFER DRM_IOW( DRM_COMMAND_BASE + DRM_I915_BATCHBUFFER, drm_i915_batchbuffer_t) -#define DRM_IOCTL_I915_IRQ_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_IRQ_EMIT, drm_i915_irq_emit_t) -#define DRM_IOCTL_I915_IRQ_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_IRQ_WAIT, drm_i915_irq_wait_t) -#define DRM_IOCTL_I915_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GETPARAM, drm_i915_getparam_t) -#define DRM_IOCTL_I915_SETPARAM DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SETPARAM, drm_i915_setparam_t) -#define DRM_IOCTL_I915_ALLOC DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_ALLOC, drm_i915_mem_alloc_t) -#define DRM_IOCTL_I915_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_I915_FREE, drm_i915_mem_free_t) -#define DRM_IOCTL_I915_INIT_HEAP DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT_HEAP, drm_i915_mem_init_heap_t) -#define DRM_IOCTL_I915_CMDBUFFER DRM_IOW( DRM_COMMAND_BASE + DRM_I915_CMDBUFFER, drm_i915_cmdbuffer_t) -#define DRM_IOCTL_I915_DESTROY_HEAP DRM_IOW( DRM_COMMAND_BASE + DRM_I915_DESTROY_HEAP, drm_i915_mem_destroy_heap_t) -#define DRM_IOCTL_I915_SET_VBLANK_PIPE DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SET_VBLANK_PIPE, drm_i915_vblank_pipe_t) -#define DRM_IOCTL_I915_GET_VBLANK_PIPE DRM_IOR( DRM_COMMAND_BASE + DRM_I915_GET_VBLANK_PIPE, drm_i915_vblank_pipe_t) -#define DRM_IOCTL_I915_VBLANK_SWAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_VBLANK_SWAP, drm_i915_vblank_swap_t) -#define DRM_IOCTL_I915_HWS_ADDR DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init) -#define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init) -#define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) -#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) -#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2) -#define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin) -#define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin) -#define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) -#define DRM_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_SET_CACHING, struct drm_i915_gem_caching) -#define DRM_IOCTL_I915_GEM_GET_CACHING DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_GET_CACHING, struct drm_i915_gem_caching) -#define DRM_IOCTL_I915_GEM_THROTTLE DRM_IO ( DRM_COMMAND_BASE + DRM_I915_GEM_THROTTLE) -#define DRM_IOCTL_I915_GEM_ENTERVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT) -#define DRM_IOCTL_I915_GEM_LEAVEVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT) -#define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create) -#define DRM_IOCTL_I915_GEM_CREATE_EXT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE_EXT, struct drm_i915_gem_create_ext) -#define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread) -#define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite) -#define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) -#define DRM_IOCTL_I915_GEM_MMAP_GTT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt) -#define DRM_IOCTL_I915_GEM_MMAP_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_offset) -#define DRM_IOCTL_I915_GEM_SET_DOMAIN DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain) -#define DRM_IOCTL_I915_GEM_SW_FINISH DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SW_FINISH, struct drm_i915_gem_sw_finish) -#define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling) -#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling) -#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture) -#define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_i915_get_pipe_from_crtc_id) -#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise) -#define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_I915_OVERLAY_PUT_IMAGE, struct drm_intel_overlay_put_image) -#define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs) -#define DRM_IOCTL_I915_SET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey) -#define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey) -#define DRM_IOCTL_I915_GEM_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait) -#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create) -#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_ext) -#define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy) -#define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read) -#define DRM_IOCTL_I915_GET_RESET_STATS DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats) -#define DRM_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr) -#define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param) -#define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param) -#define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) -#define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config) -#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64) -#define DRM_IOCTL_I915_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query) -#define DRM_IOCTL_I915_GEM_VM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control) -#define DRM_IOCTL_I915_GEM_VM_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control) - -/* Allow drivers to submit batchbuffers directly to hardware, relying - * on the security mechanisms provided by hardware. - */ -typedef struct drm_i915_batchbuffer { - int start; /* agp offset */ - int used; /* nr bytes in use */ - int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ - int DR4; /* window origin for GFX_OP_DRAWRECT_INFO */ - int num_cliprects; /* mulitpass with multiple cliprects? */ - struct drm_clip_rect __user *cliprects; /* pointer to userspace cliprects */ -} drm_i915_batchbuffer_t; - -/* As above, but pass a pointer to userspace buffer which can be - * validated by the kernel prior to sending to hardware. - */ -typedef struct _drm_i915_cmdbuffer { - char __user *buf; /* pointer to userspace command buffer */ - int sz; /* nr bytes in buf */ - int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ - int DR4; /* window origin for GFX_OP_DRAWRECT_INFO */ - int num_cliprects; /* mulitpass with multiple cliprects? */ - struct drm_clip_rect __user *cliprects; /* pointer to userspace cliprects */ -} drm_i915_cmdbuffer_t; - -/* Userspace can request & wait on irq's: - */ -typedef struct drm_i915_irq_emit { - int __user *irq_seq; -} drm_i915_irq_emit_t; - -typedef struct drm_i915_irq_wait { - int irq_seq; -} drm_i915_irq_wait_t; - -/* - * Different modes of per-process Graphics Translation Table, - * see I915_PARAM_HAS_ALIASING_PPGTT - */ -#define I915_GEM_PPGTT_NONE 0 -#define I915_GEM_PPGTT_ALIASING 1 -#define I915_GEM_PPGTT_FULL 2 - -/* Ioctl to query kernel params: - */ -#define I915_PARAM_IRQ_ACTIVE 1 -#define I915_PARAM_ALLOW_BATCHBUFFER 2 -#define I915_PARAM_LAST_DISPATCH 3 -#define I915_PARAM_CHIPSET_ID 4 -#define I915_PARAM_HAS_GEM 5 -#define I915_PARAM_NUM_FENCES_AVAIL 6 -#define I915_PARAM_HAS_OVERLAY 7 -#define I915_PARAM_HAS_PAGEFLIPPING 8 -#define I915_PARAM_HAS_EXECBUF2 9 -#define I915_PARAM_HAS_BSD 10 -#define I915_PARAM_HAS_BLT 11 -#define I915_PARAM_HAS_RELAXED_FENCING 12 -#define I915_PARAM_HAS_COHERENT_RINGS 13 -#define I915_PARAM_HAS_EXEC_CONSTANTS 14 -#define I915_PARAM_HAS_RELAXED_DELTA 15 -#define I915_PARAM_HAS_GEN7_SOL_RESET 16 -#define I915_PARAM_HAS_LLC 17 -#define I915_PARAM_HAS_ALIASING_PPGTT 18 -#define I915_PARAM_HAS_WAIT_TIMEOUT 19 -#define I915_PARAM_HAS_SEMAPHORES 20 -#define I915_PARAM_HAS_PRIME_VMAP_FLUSH 21 -#define I915_PARAM_HAS_VEBOX 22 -#define I915_PARAM_HAS_SECURE_BATCHES 23 -#define I915_PARAM_HAS_PINNED_BATCHES 24 -#define I915_PARAM_HAS_EXEC_NO_RELOC 25 -#define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 -#define I915_PARAM_HAS_WT 27 -#define I915_PARAM_CMD_PARSER_VERSION 28 -#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29 -#define I915_PARAM_MMAP_VERSION 30 -#define I915_PARAM_HAS_BSD2 31 -#define I915_PARAM_REVISION 32 -#define I915_PARAM_SUBSLICE_TOTAL 33 -#define I915_PARAM_EU_TOTAL 34 -#define I915_PARAM_HAS_GPU_RESET 35 -#define I915_PARAM_HAS_RESOURCE_STREAMER 36 -#define I915_PARAM_HAS_EXEC_SOFTPIN 37 -#define I915_PARAM_HAS_POOLED_EU 38 -#define I915_PARAM_MIN_EU_IN_POOL 39 -#define I915_PARAM_MMAP_GTT_VERSION 40 - -/* - * Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution - * priorities and the driver will attempt to execute batches in priority order. - * The param returns a capability bitmask, nonzero implies that the scheduler - * is enabled, with different features present according to the mask. - * - * The initial priority for each batch is supplied by the context and is - * controlled via I915_CONTEXT_PARAM_PRIORITY. - */ -#define I915_PARAM_HAS_SCHEDULER 41 -#define I915_SCHEDULER_CAP_ENABLED (1ul << 0) -#define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) -#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) -#define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) -#define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4) -/* - * Indicates the 2k user priority levels are statically mapped into 3 buckets as - * follows: - * - * -1k to -1 Low priority - * 0 Normal priority - * 1 to 1k Highest priority - */ -#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5) - -/* - * Query the status of HuC load. - * - * The query can fail in the following scenarios with the listed error codes: - * -ENODEV if HuC is not present on this platform, - * -EOPNOTSUPP if HuC firmware usage is disabled, - * -ENOPKG if HuC firmware fetch failed, - * -ENOEXEC if HuC firmware is invalid or mismatched, - * -ENOMEM if i915 failed to prepare the FW objects for transfer to the uC, - * -EIO if the FW transfer or the FW authentication failed. - * - * If the IOCTL is successful, the returned parameter will be set to one of the - * following values: - * * 0 if HuC firmware load is not complete, - * * 1 if HuC firmware is loaded and fully authenticated, - * * 2 if HuC firmware is loaded and authenticated for clear media only - */ -#define I915_PARAM_HUC_STATUS 42 - -/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of - * synchronisation with implicit fencing on individual objects. - * See EXEC_OBJECT_ASYNC. - */ -#define I915_PARAM_HAS_EXEC_ASYNC 43 - -/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support - - * both being able to pass in a sync_file fd to wait upon before executing, - * and being able to return a new sync_file fd that is signaled when the - * current request is complete. See I915_EXEC_FENCE_IN and I915_EXEC_FENCE_OUT. - */ -#define I915_PARAM_HAS_EXEC_FENCE 44 - -/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture - * user-specified buffers for post-mortem debugging of GPU hangs. See - * EXEC_OBJECT_CAPTURE. - */ -#define I915_PARAM_HAS_EXEC_CAPTURE 45 - -#define I915_PARAM_SLICE_MASK 46 - -/* Assuming it's uniform for each slice, this queries the mask of subslices - * per-slice for this system. - */ -#define I915_PARAM_SUBSLICE_MASK 47 - -/* - * Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying the batch buffer - * as the first execobject as opposed to the last. See I915_EXEC_BATCH_FIRST. - */ -#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 - -/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of - * drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY. - */ -#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 - -/* - * Query whether every context (both per-file default and user created) is - * isolated (insofar as HW supports). If this parameter is not true, then - * freshly created contexts may inherit values from an existing context, - * rather than default HW values. If true, it also ensures (insofar as HW - * supports) that all state set by this context will not leak to any other - * context. - * - * As not every engine across every gen support contexts, the returned - * value reports the support of context isolation for individual engines by - * returning a bitmask of each engine class set to true if that class supports - * isolation. - */ -#define I915_PARAM_HAS_CONTEXT_ISOLATION 50 - -/* Frequency of the command streamer timestamps given by the *_TIMESTAMP - * registers. This used to be fixed per platform but from CNL onwards, this - * might vary depending on the parts. - */ -#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51 - -/* - * Once upon a time we supposed that writes through the GGTT would be - * immediately in physical memory (once flushed out of the CPU path). However, - * on a few different processors and chipsets, this is not necessarily the case - * as the writes appear to be buffered internally. Thus a read of the backing - * storage (physical memory) via a different path (with different physical tags - * to the indirect write via the GGTT) will see stale values from before - * the GGTT write. Inside the kernel, we can for the most part keep track of - * the different read/write domains in use (e.g. set-domain), but the assumption - * of coherency is baked into the ABI, hence reporting its true state in this - * parameter. - * - * Reports true when writes via mmap_gtt are immediately visible following an - * lfence to flush the WCB. - * - * Reports false when writes via mmap_gtt are indeterminately delayed in an in - * internal buffer and are _not_ immediately visible to third parties accessing - * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC - * communications channel when reporting false is strongly disadvised. - */ -#define I915_PARAM_MMAP_GTT_COHERENT 52 - -/* - * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel - * execution through use of explicit fence support. - * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT. - */ -#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53 - -/* - * Revision of the i915-perf uAPI. The value returned helps determine what - * i915-perf features are available. See drm_i915_perf_property_id. - */ -#define I915_PARAM_PERF_REVISION 54 - -/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of - * timeline syncobj through drm_i915_gem_execbuffer_ext_timeline_fences. See - * I915_EXEC_USE_EXTENSIONS. - */ -#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 - -/* Query if the kernel supports the I915_USERPTR_PROBE flag. */ -#define I915_PARAM_HAS_USERPTR_PROBE 56 - -/* - * Frequency of the timestamps in OA reports. This used to be the same as the CS - * timestamp frequency, but differs on some platforms. - */ -#define I915_PARAM_OA_TIMESTAMP_FREQUENCY 57 - -/* - * Query the status of PXP support in i915. - * - * The query can fail in the following scenarios with the listed error codes: - * -ENODEV = PXP support is not available on the GPU device or in the - * kernel due to missing component drivers or kernel configs. - * - * If the IOCTL is successful, the returned parameter will be set to one of - * the following values: - * 1 = PXP feature is supported and is ready for use. - * 2 = PXP feature is supported but should be ready soon (pending - * initialization of non-i915 system dependencies). - * - * NOTE: When param is supported (positive return values), user space should - * still refer to the GEM PXP context-creation UAPI header specs to be - * aware of possible failure due to system state machine at the time. - */ -#define I915_PARAM_PXP_STATUS 58 - -/* - * Query if kernel allows marking a context to send a Freq hint to SLPC. This - * will enable use of the strategies allowed by the SLPC algorithm. - */ -#define I915_PARAM_HAS_CONTEXT_FREQ_HINT 59 - -/* Must be kept compact -- no holes and well documented */ - -/** - * struct drm_i915_getparam - Driver parameter query structure. - */ -struct drm_i915_getparam { - /** @param: Driver parameter to query. */ - __s32 param; - - /** - * @value: Address of memory where queried value should be put. - * - * WARNING: Using pointers instead of fixed-size u64 means we need to write - * compat32 code. Don't repeat this mistake. - */ - int __user *value; -}; - -/** - * typedef drm_i915_getparam_t - Driver parameter query structure. - * See struct drm_i915_getparam. - */ -typedef struct drm_i915_getparam drm_i915_getparam_t; - -/* Ioctl to set kernel params: - */ -#define I915_SETPARAM_USE_MI_BATCHBUFFER_START 1 -#define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 -#define I915_SETPARAM_ALLOW_BATCHBUFFER 3 -#define I915_SETPARAM_NUM_USED_FENCES 4 -/* Must be kept compact -- no holes */ - -typedef struct drm_i915_setparam { - int param; - int value; -} drm_i915_setparam_t; - -/* A memory manager for regions of shared memory: - */ -#define I915_MEM_REGION_AGP 1 - -typedef struct drm_i915_mem_alloc { - int region; - int alignment; - int size; - int __user *region_offset; /* offset from start of fb or agp */ -} drm_i915_mem_alloc_t; - -typedef struct drm_i915_mem_free { - int region; - int region_offset; -} drm_i915_mem_free_t; - -typedef struct drm_i915_mem_init_heap { - int region; - int size; - int start; -} drm_i915_mem_init_heap_t; - -/* Allow memory manager to be torn down and re-initialized (eg on - * rotate): - */ -typedef struct drm_i915_mem_destroy_heap { - int region; -} drm_i915_mem_destroy_heap_t; - -/* Allow X server to configure which pipes to monitor for vblank signals - */ -#define DRM_I915_VBLANK_PIPE_A 1 -#define DRM_I915_VBLANK_PIPE_B 2 - -typedef struct drm_i915_vblank_pipe { - int pipe; -} drm_i915_vblank_pipe_t; - -/* Schedule buffer swap at given vertical blank: - */ -typedef struct drm_i915_vblank_swap { - drm_drawable_t drawable; - enum drm_vblank_seq_type seqtype; - unsigned int sequence; -} drm_i915_vblank_swap_t; - -typedef struct drm_i915_hws_addr { - __u64 addr; -} drm_i915_hws_addr_t; - -struct drm_i915_gem_init { - /** - * Beginning offset in the GTT to be managed by the DRM memory - * manager. - */ - __u64 gtt_start; - /** - * Ending offset in the GTT to be managed by the DRM memory - * manager. - */ - __u64 gtt_end; -}; - -struct drm_i915_gem_create { - /** - * Requested size for the object. - * - * The (page-aligned) allocated size for the object will be returned. - */ - __u64 size; - /** - * Returned handle for the object. - * - * Object handles are nonzero. - */ - __u32 handle; - __u32 pad; -}; - -struct drm_i915_gem_pread { - /** Handle for the object being read. */ - __u32 handle; - __u32 pad; - /** Offset into the object to read from */ - __u64 offset; - /** Length of data to read */ - __u64 size; - /** - * Pointer to write the data into. - * - * This is a fixed-size type for 32/64 compatibility. - */ - __u64 data_ptr; -}; - -struct drm_i915_gem_pwrite { - /** Handle for the object being written to. */ - __u32 handle; - __u32 pad; - /** Offset into the object to write to */ - __u64 offset; - /** Length of data to write */ - __u64 size; - /** - * Pointer to read the data from. - * - * This is a fixed-size type for 32/64 compatibility. - */ - __u64 data_ptr; -}; - -struct drm_i915_gem_mmap { - /** Handle for the object being mapped. */ - __u32 handle; - __u32 pad; - /** Offset in the object to map. */ - __u64 offset; - /** - * Length of data to map. - * - * The value will be page-aligned. - */ - __u64 size; - /** - * Returned pointer the data was mapped at. - * - * This is a fixed-size type for 32/64 compatibility. - */ - __u64 addr_ptr; - - /** - * Flags for extended behaviour. - * - * Added in version 2. - */ - __u64 flags; -#define I915_MMAP_WC 0x1 -}; - -struct drm_i915_gem_mmap_gtt { - /** Handle for the object being mapped. */ - __u32 handle; - __u32 pad; - /** - * Fake offset to use for subsequent mmap call - * - * This is a fixed-size type for 32/64 compatibility. - */ - __u64 offset; -}; - -/** - * struct drm_i915_gem_mmap_offset - Retrieve an offset so we can mmap this buffer object. - * - * This struct is passed as argument to the `DRM_IOCTL_I915_GEM_MMAP_OFFSET` ioctl, - * and is used to retrieve the fake offset to mmap an object specified by &handle. - * - * The legacy way of using `DRM_IOCTL_I915_GEM_MMAP` is removed on gen12+. - * `DRM_IOCTL_I915_GEM_MMAP_GTT` is an older supported alias to this struct, but will behave - * as setting the &extensions to 0, and &flags to `I915_MMAP_OFFSET_GTT`. - */ -struct drm_i915_gem_mmap_offset { - /** @handle: Handle for the object being mapped. */ - __u32 handle; - /** @pad: Must be zero */ - __u32 pad; - /** - * @offset: The fake offset to use for subsequent mmap call - * - * This is a fixed-size type for 32/64 compatibility. - */ - __u64 offset; - - /** - * @flags: Flags for extended behaviour. - * - * It is mandatory that one of the `MMAP_OFFSET` types - * should be included: - * - * - `I915_MMAP_OFFSET_GTT`: Use mmap with the object bound to GTT. (Write-Combined) - * - `I915_MMAP_OFFSET_WC`: Use Write-Combined caching. - * - `I915_MMAP_OFFSET_WB`: Use Write-Back caching. - * - `I915_MMAP_OFFSET_FIXED`: Use object placement to determine caching. - * - * On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only valid - * type. On devices without local memory, this caching mode is invalid. - * - * As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB will - * be used, depending on the object placement on creation. WB will be used - * when the object can only exist in system memory, WC otherwise. - */ - __u64 flags; - -#define I915_MMAP_OFFSET_GTT 0 -#define I915_MMAP_OFFSET_WC 1 -#define I915_MMAP_OFFSET_WB 2 -#define I915_MMAP_OFFSET_UC 3 -#define I915_MMAP_OFFSET_FIXED 4 - - /** - * @extensions: Zero-terminated chain of extensions. - * - * No current extensions defined; mbz. - */ - __u64 extensions; -}; - -/** - * struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in - * preparation for accessing the pages via some CPU domain. - * - * Specifying a new write or read domain will flush the object out of the - * previous domain(if required), before then updating the objects domain - * tracking with the new domain. - * - * Note this might involve waiting for the object first if it is still active on - * the GPU. - * - * Supported values for @read_domains and @write_domain: - * - * - I915_GEM_DOMAIN_WC: Uncached write-combined domain - * - I915_GEM_DOMAIN_CPU: CPU cache domain - * - I915_GEM_DOMAIN_GTT: Mappable aperture domain - * - * All other domains are rejected. - * - * Note that for discrete, starting from DG1, this is no longer supported, and - * is instead rejected. On such platforms the CPU domain is effectively static, - * where we also only support a single &drm_i915_gem_mmap_offset cache mode, - * which can't be set explicitly and instead depends on the object placements, - * as per the below. - * - * Implicit caching rules, starting from DG1: - * - * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) - * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and - * mapped as write-combined only. - * - * - Everything else is always allocated and mapped as write-back, with the - * guarantee that everything is also coherent with the GPU. - * - * Note that this is likely to change in the future again, where we might need - * more flexibility on future devices, so making this all explicit as part of a - * new &drm_i915_gem_create_ext extension is probable. - */ -struct drm_i915_gem_set_domain { - /** @handle: Handle for the object. */ - __u32 handle; - - /** @read_domains: New read domains. */ - __u32 read_domains; - - /** - * @write_domain: New write domain. - * - * Note that having something in the write domain implies it's in the - * read domain, and only that read domain. - */ - __u32 write_domain; -}; - -struct drm_i915_gem_sw_finish { - /** Handle for the object */ - __u32 handle; -}; - -struct drm_i915_gem_relocation_entry { - /** - * Handle of the buffer being pointed to by this relocation entry. - * - * It's appealing to make this be an index into the mm_validate_entry - * list to refer to the buffer, but this allows the driver to create - * a relocation list for state buffers and not re-write it per - * exec using the buffer. - */ - __u32 target_handle; - - /** - * Value to be added to the offset of the target buffer to make up - * the relocation entry. - */ - __u32 delta; - - /** Offset in the buffer the relocation entry will be written into */ - __u64 offset; - - /** - * Offset value of the target buffer that the relocation entry was last - * written as. - * - * If the buffer has the same offset as last time, we can skip syncing - * and writing the relocation. This value is written back out by - * the execbuffer ioctl when the relocation is written. - */ - __u64 presumed_offset; - - /** - * Target memory domains read by this operation. - */ - __u32 read_domains; - - /** - * Target memory domains written by this operation. - * - * Note that only one domain may be written by the whole - * execbuffer operation, so that where there are conflicts, - * the application will get -EINVAL back. - */ - __u32 write_domain; -}; - -/** @{ - * Intel memory domains - * - * Most of these just align with the various caches in - * the system and are used to flush and invalidate as - * objects end up cached in different domains. - */ -/** CPU cache */ -#define I915_GEM_DOMAIN_CPU 0x00000001 -/** Render cache, used by 2D and 3D drawing */ -#define I915_GEM_DOMAIN_RENDER 0x00000002 -/** Sampler cache, used by texture engine */ -#define I915_GEM_DOMAIN_SAMPLER 0x00000004 -/** Command queue, used to load batch buffers */ -#define I915_GEM_DOMAIN_COMMAND 0x00000008 -/** Instruction cache, used by shader programs */ -#define I915_GEM_DOMAIN_INSTRUCTION 0x00000010 -/** Vertex address cache */ -#define I915_GEM_DOMAIN_VERTEX 0x00000020 -/** GTT domain - aperture and scanout */ -#define I915_GEM_DOMAIN_GTT 0x00000040 -/** WC domain - uncached access */ -#define I915_GEM_DOMAIN_WC 0x00000080 -/** @} */ - -struct drm_i915_gem_exec_object { - /** - * User's handle for a buffer to be bound into the GTT for this - * operation. - */ - __u32 handle; - - /** Number of relocations to be performed on this buffer */ - __u32 relocation_count; - /** - * Pointer to array of struct drm_i915_gem_relocation_entry containing - * the relocations to be performed in this buffer. - */ - __u64 relocs_ptr; - - /** Required alignment in graphics aperture */ - __u64 alignment; - - /** - * Returned value of the updated offset of the object, for future - * presumed_offset writes. - */ - __u64 offset; -}; - -/* DRM_IOCTL_I915_GEM_EXECBUFFER was removed in Linux 5.13 */ -struct drm_i915_gem_execbuffer { - /** - * List of buffers to be validated with their relocations to be - * performend on them. - * - * This is a pointer to an array of struct drm_i915_gem_validate_entry. - * - * These buffers must be listed in an order such that all relocations - * a buffer is performing refer to buffers that have already appeared - * in the validate list. - */ - __u64 buffers_ptr; - __u32 buffer_count; - - /** Offset in the batchbuffer to start execution from. */ - __u32 batch_start_offset; - /** Bytes used in batchbuffer from batch_start_offset */ - __u32 batch_len; - __u32 DR1; - __u32 DR4; - __u32 num_cliprects; - /** This is a struct drm_clip_rect *cliprects */ - __u64 cliprects_ptr; -}; - -struct drm_i915_gem_exec_object2 { - /** - * User's handle for a buffer to be bound into the GTT for this - * operation. - */ - __u32 handle; - - /** Number of relocations to be performed on this buffer */ - __u32 relocation_count; - /** - * Pointer to array of struct drm_i915_gem_relocation_entry containing - * the relocations to be performed in this buffer. - */ - __u64 relocs_ptr; - - /** Required alignment in graphics aperture */ - __u64 alignment; - - /** - * When the EXEC_OBJECT_PINNED flag is specified this is populated by - * the user with the GTT offset at which this object will be pinned. - * - * When the I915_EXEC_NO_RELOC flag is specified this must contain the - * presumed_offset of the object. - * - * During execbuffer2 the kernel populates it with the value of the - * current GTT offset of the object, for future presumed_offset writes. - * - * See struct drm_i915_gem_create_ext for the rules when dealing with - * alignment restrictions with I915_MEMORY_CLASS_DEVICE, on devices with - * minimum page sizes, like DG2. - */ - __u64 offset; - -#define EXEC_OBJECT_NEEDS_FENCE (1<<0) -#define EXEC_OBJECT_NEEDS_GTT (1<<1) -#define EXEC_OBJECT_WRITE (1<<2) -#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) -#define EXEC_OBJECT_PINNED (1<<4) -#define EXEC_OBJECT_PAD_TO_SIZE (1<<5) -/* The kernel implicitly tracks GPU activity on all GEM objects, and - * synchronises operations with outstanding rendering. This includes - * rendering on other devices if exported via dma-buf. However, sometimes - * this tracking is too coarse and the user knows better. For example, - * if the object is split into non-overlapping ranges shared between different - * clients or engines (i.e. suballocating objects), the implicit tracking - * by kernel assumes that each operation affects the whole object rather - * than an individual range, causing needless synchronisation between clients. - * The kernel will also forgo any CPU cache flushes prior to rendering from - * the object as the client is expected to be also handling such domain - * tracking. - * - * The kernel maintains the implicit tracking in order to manage resources - * used by the GPU - this flag only disables the synchronisation prior to - * rendering with this object in this execbuf. - * - * Opting out of implicit synhronisation requires the user to do its own - * explicit tracking to avoid rendering corruption. See, for example, - * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously. - */ -#define EXEC_OBJECT_ASYNC (1<<6) -/* Request that the contents of this execobject be copied into the error - * state upon a GPU hang involving this batch for post-mortem debugging. - * These buffers are recorded in no particular order as "user" in - * /sys/class/drm/cardN/error. Query I915_PARAM_HAS_EXEC_CAPTURE to see - * if the kernel supports this flag. - */ -#define EXEC_OBJECT_CAPTURE (1<<7) -/* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_CAPTURE<<1) - __u64 flags; - - union { - __u64 rsvd1; - __u64 pad_to_size; - }; - __u64 rsvd2; -}; - -/** - * struct drm_i915_gem_exec_fence - An input or output fence for the execbuf - * ioctl. - * - * The request will wait for input fence to signal before submission. - * - * The returned output fence will be signaled after the completion of the - * request. - */ -struct drm_i915_gem_exec_fence { - /** @handle: User's handle for a drm_syncobj to wait on or signal. */ - __u32 handle; - - /** - * @flags: Supported flags are: - * - * I915_EXEC_FENCE_WAIT: - * Wait for the input fence before request submission. - * - * I915_EXEC_FENCE_SIGNAL: - * Return request completion fence as output - */ - __u32 flags; -#define I915_EXEC_FENCE_WAIT (1<<0) -#define I915_EXEC_FENCE_SIGNAL (1<<1) -#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1)) -}; - -/** - * struct drm_i915_gem_execbuffer_ext_timeline_fences - Timeline fences - * for execbuf ioctl. - * - * This structure describes an array of drm_syncobj and associated points for - * timeline variants of drm_syncobj. It is invalid to append this structure to - * the execbuf if I915_EXEC_FENCE_ARRAY is set. - */ -struct drm_i915_gem_execbuffer_ext_timeline_fences { -#define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0 - /** @base: Extension link. See struct i915_user_extension. */ - struct i915_user_extension base; - - /** - * @fence_count: Number of elements in the @handles_ptr & @value_ptr - * arrays. - */ - __u64 fence_count; - - /** - * @handles_ptr: Pointer to an array of struct drm_i915_gem_exec_fence - * of length @fence_count. - */ - __u64 handles_ptr; - - /** - * @values_ptr: Pointer to an array of u64 values of length - * @fence_count. - * Values must be 0 for a binary drm_syncobj. A Value of 0 for a - * timeline drm_syncobj is invalid as it turns a drm_syncobj into a - * binary one. - */ - __u64 values_ptr; -}; - -/** - * struct drm_i915_gem_execbuffer2 - Structure for DRM_I915_GEM_EXECBUFFER2 - * ioctl. - */ -struct drm_i915_gem_execbuffer2 { - /** @buffers_ptr: Pointer to a list of gem_exec_object2 structs */ - __u64 buffers_ptr; - - /** @buffer_count: Number of elements in @buffers_ptr array */ - __u32 buffer_count; - - /** - * @batch_start_offset: Offset in the batchbuffer to start execution - * from. - */ - __u32 batch_start_offset; - - /** - * @batch_len: Length in bytes of the batch buffer, starting from the - * @batch_start_offset. If 0, length is assumed to be the batch buffer - * object size. - */ - __u32 batch_len; - - /** @DR1: deprecated */ - __u32 DR1; - - /** @DR4: deprecated */ - __u32 DR4; - - /** @num_cliprects: See @cliprects_ptr */ - __u32 num_cliprects; - - /** - * @cliprects_ptr: Kernel clipping was a DRI1 misfeature. - * - * It is invalid to use this field if I915_EXEC_FENCE_ARRAY or - * I915_EXEC_USE_EXTENSIONS flags are not set. - * - * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array - * of &drm_i915_gem_exec_fence and @num_cliprects is the length of the - * array. - * - * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a - * single &i915_user_extension and num_cliprects is 0. - */ - __u64 cliprects_ptr; - - /** @flags: Execbuf flags */ - __u64 flags; -#define I915_EXEC_RING_MASK (0x3f) -#define I915_EXEC_DEFAULT (0<<0) -#define I915_EXEC_RENDER (1<<0) -#define I915_EXEC_BSD (2<<0) -#define I915_EXEC_BLT (3<<0) -#define I915_EXEC_VEBOX (4<<0) - -/* Used for switching the constants addressing mode on gen4+ RENDER ring. - * Gen6+ only supports relative addressing to dynamic state (default) and - * absolute addressing. - * - * These flags are ignored for the BSD and BLT rings. - */ -#define I915_EXEC_CONSTANTS_MASK (3<<6) -#define I915_EXEC_CONSTANTS_REL_GENERAL (0<<6) /* default */ -#define I915_EXEC_CONSTANTS_ABSOLUTE (1<<6) -#define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */ - -/** Resets the SO write offset registers for transform feedback on gen7. */ -#define I915_EXEC_GEN7_SOL_RESET (1<<8) - -/** Request a privileged ("secure") batch buffer. Note only available for - * DRM_ROOT_ONLY | DRM_MASTER processes. - */ -#define I915_EXEC_SECURE (1<<9) - -/** Inform the kernel that the batch is and will always be pinned. This - * negates the requirement for a workaround to be performed to avoid - * an incoherent CS (such as can be found on 830/845). If this flag is - * not passed, the kernel will endeavour to make sure the batch is - * coherent with the CS before execution. If this flag is passed, - * userspace assumes the responsibility for ensuring the same. - */ -#define I915_EXEC_IS_PINNED (1<<10) - -/** Provide a hint to the kernel that the command stream and auxiliary - * state buffers already holds the correct presumed addresses and so the - * relocation process may be skipped if no buffers need to be moved in - * preparation for the execbuffer. - */ -#define I915_EXEC_NO_RELOC (1<<11) - -/** Use the reloc.handle as an index into the exec object array rather - * than as the per-file handle. - */ -#define I915_EXEC_HANDLE_LUT (1<<12) - -/** Used for switching BSD rings on the platforms with two BSD rings */ -#define I915_EXEC_BSD_SHIFT (13) -#define I915_EXEC_BSD_MASK (3 << I915_EXEC_BSD_SHIFT) -/* default ping-pong mode */ -#define I915_EXEC_BSD_DEFAULT (0 << I915_EXEC_BSD_SHIFT) -#define I915_EXEC_BSD_RING1 (1 << I915_EXEC_BSD_SHIFT) -#define I915_EXEC_BSD_RING2 (2 << I915_EXEC_BSD_SHIFT) - -/** Tell the kernel that the batchbuffer is processed by - * the resource streamer. - */ -#define I915_EXEC_RESOURCE_STREAMER (1<<15) - -/* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent - * a sync_file fd to wait upon (in a nonblocking manner) prior to executing - * the batch. - * - * Returns -EINVAL if the sync_file fd cannot be found. - */ -#define I915_EXEC_FENCE_IN (1<<16) - -/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd - * in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given - * to the caller, and it should be close() after use. (The fd is a regular - * file descriptor and will be cleaned up on process termination. It holds - * a reference to the request, but nothing else.) - * - * The sync_file fd can be combined with other sync_file and passed either - * to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip - * will only occur after this request completes), or to other devices. - * - * Using I915_EXEC_FENCE_OUT requires use of - * DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written - * back to userspace. Failure to do so will cause the out-fence to always - * be reported as zero, and the real fence fd to be leaked. - */ -#define I915_EXEC_FENCE_OUT (1<<17) - -/* - * Traditionally the execbuf ioctl has only considered the final element in - * the execobject[] to be the executable batch. Often though, the client - * will known the batch object prior to construction and being able to place - * it into the execobject[] array first can simplify the relocation tracking. - * Setting I915_EXEC_BATCH_FIRST tells execbuf to use element 0 of the - * execobject[] as the * batch instead (the default is to use the last - * element). - */ -#define I915_EXEC_BATCH_FIRST (1<<18) - -/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr - * define an array of i915_gem_exec_fence structures which specify a set of - * dma fences to wait upon or signal. - */ -#define I915_EXEC_FENCE_ARRAY (1<<19) - -/* - * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent - * a sync_file fd to wait upon (in a nonblocking manner) prior to executing - * the batch. - * - * Returns -EINVAL if the sync_file fd cannot be found. - */ -#define I915_EXEC_FENCE_SUBMIT (1 << 20) - -/* - * Setting I915_EXEC_USE_EXTENSIONS implies that - * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked - * list of i915_user_extension. Each i915_user_extension node is the base of a - * larger structure. The list of supported structures are listed in the - * drm_i915_gem_execbuffer_ext enum. - */ -#define I915_EXEC_USE_EXTENSIONS (1 << 21) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS << 1)) - - /** @rsvd1: Context id */ - __u64 rsvd1; - - /** - * @rsvd2: in and out sync_file file descriptors. - * - * When I915_EXEC_FENCE_IN or I915_EXEC_FENCE_SUBMIT flag is set, the - * lower 32 bits of this field will have the in sync_file fd (input). - * - * When I915_EXEC_FENCE_OUT flag is set, the upper 32 bits of this - * field will have the out sync_file fd (output). - */ - __u64 rsvd2; -}; - -#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) -#define i915_execbuffer2_set_context_id(eb2, context) \ - (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK -#define i915_execbuffer2_get_context_id(eb2) \ - ((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK) - -struct drm_i915_gem_pin { - /** Handle of the buffer to be pinned. */ - __u32 handle; - __u32 pad; - - /** alignment required within the aperture */ - __u64 alignment; - - /** Returned GTT offset of the buffer. */ - __u64 offset; -}; - -struct drm_i915_gem_unpin { - /** Handle of the buffer to be unpinned. */ - __u32 handle; - __u32 pad; -}; - -struct drm_i915_gem_busy { - /** Handle of the buffer to check for busy */ - __u32 handle; - - /** Return busy status - * - * A return of 0 implies that the object is idle (after - * having flushed any pending activity), and a non-zero return that - * the object is still in-flight on the GPU. (The GPU has not yet - * signaled completion for all pending requests that reference the - * object.) An object is guaranteed to become idle eventually (so - * long as no new GPU commands are executed upon it). Due to the - * asynchronous nature of the hardware, an object reported - * as busy may become idle before the ioctl is completed. - * - * Furthermore, if the object is busy, which engine is busy is only - * provided as a guide and only indirectly by reporting its class - * (there may be more than one engine in each class). There are race - * conditions which prevent the report of which engines are busy from - * being always accurate. However, the converse is not true. If the - * object is idle, the result of the ioctl, that all engines are idle, - * is accurate. - * - * The returned dword is split into two fields to indicate both - * the engine classes on which the object is being read, and the - * engine class on which it is currently being written (if any). - * - * The low word (bits 0:15) indicate if the object is being written - * to by any engine (there can only be one, as the GEM implicit - * synchronisation rules force writes to be serialised). Only the - * engine class (offset by 1, I915_ENGINE_CLASS_RENDER is reported as - * 1 not 0 etc) for the last write is reported. - * - * The high word (bits 16:31) are a bitmask of which engines classes - * are currently reading from the object. Multiple engines may be - * reading from the object simultaneously. - * - * The value of each engine class is the same as specified in the - * I915_CONTEXT_PARAM_ENGINES context parameter and via perf, i.e. - * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc. - * Some hardware may have parallel execution engines, e.g. multiple - * media engines, which are mapped to the same class identifier and so - * are not separately reported for busyness. - * - * Caveat emptor: - * Only the boolean result of this query is reliable; that is whether - * the object is idle or busy. The report of which engines are busy - * should be only used as a heuristic. - */ - __u32 busy; -}; - -/** - * struct drm_i915_gem_caching - Set or get the caching for given object - * handle. - * - * Allow userspace to control the GTT caching bits for a given object when the - * object is later mapped through the ppGTT(or GGTT on older platforms lacking - * ppGTT support, or if the object is used for scanout). Note that this might - * require unbinding the object from the GTT first, if its current caching value - * doesn't match. - * - * Note that this all changes on discrete platforms, starting from DG1, the - * set/get caching is no longer supported, and is now rejected. Instead the CPU - * caching attributes(WB vs WC) will become an immutable creation time property - * for the object, along with the GTT caching level. For now we don't expose any - * new uAPI for this, instead on DG1 this is all implicit, although this largely - * shouldn't matter since DG1 is coherent by default(without any way of - * controlling it). - * - * Implicit caching rules, starting from DG1: - * - * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) - * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and - * mapped as write-combined only. - * - * - Everything else is always allocated and mapped as write-back, with the - * guarantee that everything is also coherent with the GPU. - * - * Note that this is likely to change in the future again, where we might need - * more flexibility on future devices, so making this all explicit as part of a - * new &drm_i915_gem_create_ext extension is probable. - * - * Side note: Part of the reason for this is that changing the at-allocation-time CPU - * caching attributes for the pages might be required(and is expensive) if we - * need to then CPU map the pages later with different caching attributes. This - * inconsistent caching behaviour, while supported on x86, is not universally - * supported on other architectures. So for simplicity we opt for setting - * everything at creation time, whilst also making it immutable, on discrete - * platforms. - */ -struct drm_i915_gem_caching { - /** - * @handle: Handle of the buffer to set/get the caching level. - */ - __u32 handle; - - /** - * @caching: The GTT caching level to apply or possible return value. - * - * The supported @caching values: - * - * I915_CACHING_NONE: - * - * GPU access is not coherent with CPU caches. Default for machines - * without an LLC. This means manual flushing might be needed, if we - * want GPU access to be coherent. - * - * I915_CACHING_CACHED: - * - * GPU access is coherent with CPU caches and furthermore the data is - * cached in last-level caches shared between CPU cores and the GPU GT. - * - * I915_CACHING_DISPLAY: - * - * Special GPU caching mode which is coherent with the scanout engines. - * Transparently falls back to I915_CACHING_NONE on platforms where no - * special cache mode (like write-through or gfdt flushing) is - * available. The kernel automatically sets this mode when using a - * buffer as a scanout target. Userspace can manually set this mode to - * avoid a costly stall and clflush in the hotpath of drawing the first - * frame. - */ -#define I915_CACHING_NONE 0 -#define I915_CACHING_CACHED 1 -#define I915_CACHING_DISPLAY 2 - __u32 caching; -}; - -#define I915_TILING_NONE 0 -#define I915_TILING_X 1 -#define I915_TILING_Y 2 -/* - * Do not add new tiling types here. The I915_TILING_* values are for - * de-tiling fence registers that no longer exist on modern platforms. Although - * the hardware may support new types of tiling in general (e.g., Tile4), we - * do not need to add them to the uapi that is specific to now-defunct ioctls. - */ -#define I915_TILING_LAST I915_TILING_Y - -#define I915_BIT_6_SWIZZLE_NONE 0 -#define I915_BIT_6_SWIZZLE_9 1 -#define I915_BIT_6_SWIZZLE_9_10 2 -#define I915_BIT_6_SWIZZLE_9_11 3 -#define I915_BIT_6_SWIZZLE_9_10_11 4 -/* Not seen by userland */ -#define I915_BIT_6_SWIZZLE_UNKNOWN 5 -/* Seen by userland. */ -#define I915_BIT_6_SWIZZLE_9_17 6 -#define I915_BIT_6_SWIZZLE_9_10_17 7 - -struct drm_i915_gem_set_tiling { - /** Handle of the buffer to have its tiling state updated */ - __u32 handle; - - /** - * Tiling mode for the object (I915_TILING_NONE, I915_TILING_X, - * I915_TILING_Y). - * - * This value is to be set on request, and will be updated by the - * kernel on successful return with the actual chosen tiling layout. - * - * The tiling mode may be demoted to I915_TILING_NONE when the system - * has bit 6 swizzling that can't be managed correctly by GEM. - * - * Buffer contents become undefined when changing tiling_mode. - */ - __u32 tiling_mode; - - /** - * Stride in bytes for the object when in I915_TILING_X or - * I915_TILING_Y. - */ - __u32 stride; - - /** - * Returned address bit 6 swizzling required for CPU access through - * mmap mapping. - */ - __u32 swizzle_mode; -}; - -struct drm_i915_gem_get_tiling { - /** Handle of the buffer to get tiling state for. */ - __u32 handle; - - /** - * Current tiling mode for the object (I915_TILING_NONE, I915_TILING_X, - * I915_TILING_Y). - */ - __u32 tiling_mode; - - /** - * Returned address bit 6 swizzling required for CPU access through - * mmap mapping. - */ - __u32 swizzle_mode; - - /** - * Returned address bit 6 swizzling required for CPU access through - * mmap mapping whilst bound. - */ - __u32 phys_swizzle_mode; -}; - -struct drm_i915_gem_get_aperture { - /** Total size of the aperture used by i915_gem_execbuffer, in bytes */ - __u64 aper_size; - - /** - * Available space in the aperture used by i915_gem_execbuffer, in - * bytes - */ - __u64 aper_available_size; -}; - -struct drm_i915_get_pipe_from_crtc_id { - /** ID of CRTC being requested **/ - __u32 crtc_id; - - /** pipe of requested CRTC **/ - __u32 pipe; -}; - -#define I915_MADV_WILLNEED 0 -#define I915_MADV_DONTNEED 1 -#define __I915_MADV_PURGED 2 /* internal state */ - -struct drm_i915_gem_madvise { - /** Handle of the buffer to change the backing store advice */ - __u32 handle; - - /* Advice: either the buffer will be needed again in the near future, - * or won't be and could be discarded under memory pressure. - */ - __u32 madv; - - /** Whether the backing store still exists. */ - __u32 retained; -}; - -/* flags */ -#define I915_OVERLAY_TYPE_MASK 0xff -#define I915_OVERLAY_YUV_PLANAR 0x01 -#define I915_OVERLAY_YUV_PACKED 0x02 -#define I915_OVERLAY_RGB 0x03 - -#define I915_OVERLAY_DEPTH_MASK 0xff00 -#define I915_OVERLAY_RGB24 0x1000 -#define I915_OVERLAY_RGB16 0x2000 -#define I915_OVERLAY_RGB15 0x3000 -#define I915_OVERLAY_YUV422 0x0100 -#define I915_OVERLAY_YUV411 0x0200 -#define I915_OVERLAY_YUV420 0x0300 -#define I915_OVERLAY_YUV410 0x0400 - -#define I915_OVERLAY_SWAP_MASK 0xff0000 -#define I915_OVERLAY_NO_SWAP 0x000000 -#define I915_OVERLAY_UV_SWAP 0x010000 -#define I915_OVERLAY_Y_SWAP 0x020000 -#define I915_OVERLAY_Y_AND_UV_SWAP 0x030000 - -#define I915_OVERLAY_FLAGS_MASK 0xff000000 -#define I915_OVERLAY_ENABLE 0x01000000 - -struct drm_intel_overlay_put_image { - /* various flags and src format description */ - __u32 flags; - /* source picture description */ - __u32 bo_handle; - /* stride values and offsets are in bytes, buffer relative */ - __u16 stride_Y; /* stride for packed formats */ - __u16 stride_UV; - __u32 offset_Y; /* offset for packet formats */ - __u32 offset_U; - __u32 offset_V; - /* in pixels */ - __u16 src_width; - __u16 src_height; - /* to compensate the scaling factors for partially covered surfaces */ - __u16 src_scan_width; - __u16 src_scan_height; - /* output crtc description */ - __u32 crtc_id; - __u16 dst_x; - __u16 dst_y; - __u16 dst_width; - __u16 dst_height; -}; - -/* flags */ -#define I915_OVERLAY_UPDATE_ATTRS (1<<0) -#define I915_OVERLAY_UPDATE_GAMMA (1<<1) -#define I915_OVERLAY_DISABLE_DEST_COLORKEY (1<<2) -struct drm_intel_overlay_attrs { - __u32 flags; - __u32 color_key; - __s32 brightness; - __u32 contrast; - __u32 saturation; - __u32 gamma0; - __u32 gamma1; - __u32 gamma2; - __u32 gamma3; - __u32 gamma4; - __u32 gamma5; -}; - -/* - * Intel sprite handling - * - * Color keying works with a min/mask/max tuple. Both source and destination - * color keying is allowed. - * - * Source keying: - * Sprite pixels within the min & max values, masked against the color channels - * specified in the mask field, will be transparent. All other pixels will - * be displayed on top of the primary plane. For RGB surfaces, only the min - * and mask fields will be used; ranged compares are not allowed. - * - * Destination keying: - * Primary plane pixels that match the min value, masked against the color - * channels specified in the mask field, will be replaced by corresponding - * pixels from the sprite plane. - * - * Note that source & destination keying are exclusive; only one can be - * active on a given plane. - */ - -#define I915_SET_COLORKEY_NONE (1<<0) /* Deprecated. Instead set - * flags==0 to disable colorkeying. - */ -#define I915_SET_COLORKEY_DESTINATION (1<<1) -#define I915_SET_COLORKEY_SOURCE (1<<2) -struct drm_intel_sprite_colorkey { - __u32 plane_id; - __u32 min_value; - __u32 channel_mask; - __u32 max_value; - __u32 flags; -}; - -struct drm_i915_gem_wait { - /** Handle of BO we shall wait on */ - __u32 bo_handle; - __u32 flags; - /** Number of nanoseconds to wait, Returns time remaining. */ - __s64 timeout_ns; -}; - -struct drm_i915_gem_context_create { - __u32 ctx_id; /* output: id of new context*/ - __u32 pad; -}; - -/** - * struct drm_i915_gem_context_create_ext - Structure for creating contexts. - */ -struct drm_i915_gem_context_create_ext { - /** @ctx_id: Id of the created context (output) */ - __u32 ctx_id; - - /** - * @flags: Supported flags are: - * - * I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS: - * - * Extensions may be appended to this structure and driver must check - * for those. See @extensions. - * - * I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE - * - * Created context will have single timeline. - */ - __u32 flags; -#define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS (1u << 0) -#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1u << 1) -#define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \ - (-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1)) - - /** - * @extensions: Zero-terminated chain of extensions. - * - * I915_CONTEXT_CREATE_EXT_SETPARAM: - * Context parameter to set or query during context creation. - * See struct drm_i915_gem_context_create_ext_setparam. - * - * I915_CONTEXT_CREATE_EXT_CLONE: - * This extension has been removed. On the off chance someone somewhere - * has attempted to use it, never re-use this extension number. - */ - __u64 extensions; -#define I915_CONTEXT_CREATE_EXT_SETPARAM 0 -#define I915_CONTEXT_CREATE_EXT_CLONE 1 -}; - -/** - * struct drm_i915_gem_context_param - Context parameter to set or query. - */ -struct drm_i915_gem_context_param { - /** @ctx_id: Context id */ - __u32 ctx_id; - - /** @size: Size of the parameter @value */ - __u32 size; - - /** @param: Parameter to set or query */ - __u64 param; -#define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 -/* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance - * someone somewhere has attempted to use it, never re-use this context - * param number. - */ -#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 -#define I915_CONTEXT_PARAM_GTT_SIZE 0x3 -#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 -#define I915_CONTEXT_PARAM_BANNABLE 0x5 -#define I915_CONTEXT_PARAM_PRIORITY 0x6 -#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */ -#define I915_CONTEXT_DEFAULT_PRIORITY 0 -#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */ - /* - * When using the following param, value should be a pointer to - * drm_i915_gem_context_param_sseu. - */ -#define I915_CONTEXT_PARAM_SSEU 0x7 - -/* - * Not all clients may want to attempt automatic recover of a context after - * a hang (for example, some clients may only submit very small incremental - * batches relying on known logical state of previous batches which will never - * recover correctly and each attempt will hang), and so would prefer that - * the context is forever banned instead. - * - * If set to false (0), after a reset, subsequent (and in flight) rendering - * from this context is discarded, and the client will need to create a new - * context to use instead. - * - * If set to true (1), the kernel will automatically attempt to recover the - * context by skipping the hanging batch and executing the next batch starting - * from the default context state (discarding the incomplete logical context - * state lost due to the reset). - * - * On creation, all new contexts are marked as recoverable. - */ -#define I915_CONTEXT_PARAM_RECOVERABLE 0x8 - - /* - * The id of the associated virtual memory address space (ppGTT) of - * this context. Can be retrieved and passed to another context - * (on the same fd) for both to use the same ppGTT and so share - * address layouts, and avoid reloading the page tables on context - * switches between themselves. - * - * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY. - */ -#define I915_CONTEXT_PARAM_VM 0x9 - -/* - * I915_CONTEXT_PARAM_ENGINES: - * - * Bind this context to operate on this subset of available engines. Henceforth, - * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as - * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0] - * and upwards. Slots 0...N are filled in using the specified (class, instance). - * Use - * engine_class: I915_ENGINE_CLASS_INVALID, - * engine_instance: I915_ENGINE_CLASS_INVALID_NONE - * to specify a gap in the array that can be filled in later, e.g. by a - * virtual engine used for load balancing. - * - * Setting the number of engines bound to the context to 0, by passing a zero - * sized argument, will revert back to default settings. - * - * See struct i915_context_param_engines. - * - * Extensions: - * i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE) - * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND) - * i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT) - */ -#define I915_CONTEXT_PARAM_ENGINES 0xa - -/* - * I915_CONTEXT_PARAM_PERSISTENCE: - * - * Allow the context and active rendering to survive the process until - * completion. Persistence allows fire-and-forget clients to queue up a - * bunch of work, hand the output over to a display server and then quit. - * If the context is marked as not persistent, upon closing (either via - * an explicit DRM_I915_GEM_CONTEXT_DESTROY or implicitly from file closure - * or process termination), the context and any outstanding requests will be - * cancelled (and exported fences for cancelled requests marked as -EIO). - * - * By default, new contexts allow persistence. - */ -#define I915_CONTEXT_PARAM_PERSISTENCE 0xb - -/* This API has been removed. On the off chance someone somewhere has - * attempted to use it, never re-use this context param number. - */ -#define I915_CONTEXT_PARAM_RINGSIZE 0xc - -/* - * I915_CONTEXT_PARAM_PROTECTED_CONTENT: - * - * Mark that the context makes use of protected content, which will result - * in the context being invalidated when the protected content session is. - * Given that the protected content session is killed on suspend, the device - * is kept awake for the lifetime of a protected context, so the user should - * make sure to dispose of them once done. - * This flag can only be set at context creation time and, when set to true, - * must be preceded by an explicit setting of I915_CONTEXT_PARAM_RECOVERABLE - * to false. This flag can't be set to true in conjunction with setting the - * I915_CONTEXT_PARAM_BANNABLE flag to false. Creation example: - * - * .. code-block:: C - * - * struct drm_i915_gem_context_create_ext_setparam p_protected = { - * .base = { - * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, - * }, - * .param = { - * .param = I915_CONTEXT_PARAM_PROTECTED_CONTENT, - * .value = 1, - * } - * }; - * struct drm_i915_gem_context_create_ext_setparam p_norecover = { - * .base = { - * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, - * .next_extension = to_user_pointer(&p_protected), - * }, - * .param = { - * .param = I915_CONTEXT_PARAM_RECOVERABLE, - * .value = 0, - * } - * }; - * struct drm_i915_gem_context_create_ext create = { - * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, - * .extensions = to_user_pointer(&p_norecover); - * }; - * - * ctx_id = gem_context_create_ext(drm_fd, &create); - * - * In addition to the normal failure cases, setting this flag during context - * creation can result in the following errors: - * - * -ENODEV: feature not available - * -EPERM: trying to mark a recoverable or not bannable context as protected - * -ENXIO: A dependency such as a component driver or firmware is not yet - * loaded so user space may need to attempt again. Depending on the - * device, this error may be reported if protected context creation is - * attempted very early after kernel start because the internal timeout - * waiting for such dependencies is not guaranteed to be larger than - * required (numbers differ depending on system and kernel config): - * - ADL/RPL: dependencies may take up to 3 seconds from kernel start - * while context creation internal timeout is 250 milisecs - * - MTL: dependencies may take up to 8 seconds from kernel start - * while context creation internal timeout is 250 milisecs - * NOTE: such dependencies happen once, so a subsequent call to create a - * protected context after a prior successful call will not experience - * such timeouts and will not return -ENXIO (unless the driver is reloaded, - * or, depending on the device, resumes from a suspended state). - * -EIO: The firmware did not succeed in creating the protected context. - */ -#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd - -/* - * I915_CONTEXT_PARAM_LOW_LATENCY: - * - * Mark this context as a low latency workload which requires aggressive GT - * frequency scaling. Use I915_PARAM_HAS_CONTEXT_FREQ_HINT to check if the kernel - * supports this per context flag. - */ -#define I915_CONTEXT_PARAM_LOW_LATENCY 0xe - -/* - * I915_CONTEXT_PARAM_CONTEXT_IMAGE: - * - * Allows userspace to provide own context images. - * - * Note that this is a debug API not available on production kernel builds. - */ -#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf -/* Must be kept compact -- no holes and well documented */ - - /** @value: Context parameter value to be set or queried */ - __u64 value; -}; - -/* - * Context SSEU programming - * - * It may be necessary for either functional or performance reason to configure - * a context to run with a reduced number of SSEU (where SSEU stands for Slice/ - * Sub-slice/EU). - * - * This is done by configuring SSEU configuration using the below - * @struct drm_i915_gem_context_param_sseu for every supported engine which - * userspace intends to use. - * - * Not all GPUs or engines support this functionality in which case an error - * code -ENODEV will be returned. - * - * Also, flexibility of possible SSEU configuration permutations varies between - * GPU generations and software imposed limitations. Requesting such a - * combination will return an error code of -EINVAL. - * - * NOTE: When perf/OA is active the context's SSEU configuration is ignored in - * favour of a single global setting. - */ -struct drm_i915_gem_context_param_sseu { - /* - * Engine class & instance to be configured or queried. - */ - struct i915_engine_class_instance engine; - - /* - * Unknown flags must be cleared to zero. - */ - __u32 flags; -#define I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX (1u << 0) - - /* - * Mask of slices to enable for the context. Valid values are a subset - * of the bitmask value returned for I915_PARAM_SLICE_MASK. - */ - __u64 slice_mask; - - /* - * Mask of subslices to enable for the context. Valid values are a - * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK. - */ - __u64 subslice_mask; - - /* - * Minimum/Maximum number of EUs to enable per subslice for the - * context. min_eus_per_subslice must be inferior or equal to - * max_eus_per_subslice. - */ - __u16 min_eus_per_subslice; - __u16 max_eus_per_subslice; - - /* - * Unused for now. Must be cleared to zero. - */ - __u32 rsvd; -}; - -/** - * DOC: Virtual Engine uAPI - * - * Virtual engine is a concept where userspace is able to configure a set of - * physical engines, submit a batch buffer, and let the driver execute it on any - * engine from the set as it sees fit. - * - * This is primarily useful on parts which have multiple instances of a same - * class engine, like for example GT3+ Skylake parts with their two VCS engines. - * - * For instance userspace can enumerate all engines of a certain class using the - * previously described `Engine Discovery uAPI`_. After that userspace can - * create a GEM context with a placeholder slot for the virtual engine (using - * `I915_ENGINE_CLASS_INVALID` and `I915_ENGINE_CLASS_INVALID_NONE` for class - * and instance respectively) and finally using the - * `I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE` extension place a virtual engine in - * the same reserved slot. - * - * Example of creating a virtual engine and submitting a batch buffer to it: - * - * .. code-block:: C - * - * I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(virtual, 2) = { - * .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE, - * .engine_index = 0, // Place this virtual engine into engine map slot 0 - * .num_siblings = 2, - * .engines = { { I915_ENGINE_CLASS_VIDEO, 0 }, - * { I915_ENGINE_CLASS_VIDEO, 1 }, }, - * }; - * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = { - * .engines = { { I915_ENGINE_CLASS_INVALID, - * I915_ENGINE_CLASS_INVALID_NONE } }, - * .extensions = to_user_pointer(&virtual), // Chains after load_balance extension - * }; - * struct drm_i915_gem_context_create_ext_setparam p_engines = { - * .base = { - * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, - * }, - * .param = { - * .param = I915_CONTEXT_PARAM_ENGINES, - * .value = to_user_pointer(&engines), - * .size = sizeof(engines), - * }, - * }; - * struct drm_i915_gem_context_create_ext create = { - * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, - * .extensions = to_user_pointer(&p_engines); - * }; - * - * ctx_id = gem_context_create_ext(drm_fd, &create); - * - * // Now we have created a GEM context with its engine map containing a - * // single virtual engine. Submissions to this slot can go either to - * // vcs0 or vcs1, depending on the load balancing algorithm used inside - * // the driver. The load balancing is dynamic from one batch buffer to - * // another and transparent to userspace. - * - * ... - * execbuf.rsvd1 = ctx_id; - * execbuf.flags = 0; // Submits to index 0 which is the virtual engine - * gem_execbuf(drm_fd, &execbuf); - */ - -/* - * i915_context_engines_load_balance: - * - * Enable load balancing across this set of engines. - * - * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when - * used will proxy the execbuffer request onto one of the set of engines - * in such a way as to distribute the load evenly across the set. - * - * The set of engines must be compatible (e.g. the same HW class) as they - * will share the same logical GPU context and ring. - * - * To intermix rendering with the virtual engine and direct rendering onto - * the backing engines (bypassing the load balancing proxy), the context must - * be defined to use a single timeline for all engines. - */ -struct i915_context_engines_load_balance { - struct i915_user_extension base; - - __u16 engine_index; - __u16 num_siblings; - __u32 flags; /* all undefined flags must be zero */ - - __u64 mbz64; /* reserved for future use; must be zero */ - - struct i915_engine_class_instance engines[]; -} __attribute__((packed)); - -#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \ - struct i915_user_extension base; \ - __u16 engine_index; \ - __u16 num_siblings; \ - __u32 flags; \ - __u64 mbz64; \ - struct i915_engine_class_instance engines[N__]; \ -} __attribute__((packed)) name__ - -/* - * i915_context_engines_bond: - * - * Constructed bonded pairs for execution within a virtual engine. - * - * All engines are equal, but some are more equal than others. Given - * the distribution of resources in the HW, it may be preferable to run - * a request on a given subset of engines in parallel to a request on a - * specific engine. We enable this selection of engines within a virtual - * engine by specifying bonding pairs, for any given master engine we will - * only execute on one of the corresponding siblings within the virtual engine. - * - * To execute a request in parallel on the master engine and a sibling requires - * coordination with a I915_EXEC_FENCE_SUBMIT. - */ -struct i915_context_engines_bond { - struct i915_user_extension base; - - struct i915_engine_class_instance master; - - __u16 virtual_index; /* index of virtual engine in ctx->engines[] */ - __u16 num_bonds; - - __u64 flags; /* all undefined flags must be zero */ - __u64 mbz64[4]; /* reserved for future use; must be zero */ - - struct i915_engine_class_instance engines[]; -} __attribute__((packed)); - -#define I915_DEFINE_CONTEXT_ENGINES_BOND(name__, N__) struct { \ - struct i915_user_extension base; \ - struct i915_engine_class_instance master; \ - __u16 virtual_index; \ - __u16 num_bonds; \ - __u64 flags; \ - __u64 mbz64[4]; \ - struct i915_engine_class_instance engines[N__]; \ -} __attribute__((packed)) name__ - -/** - * struct i915_context_engines_parallel_submit - Configure engine for - * parallel submission. - * - * Setup a slot in the context engine map to allow multiple BBs to be submitted - * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU - * in parallel. Multiple hardware contexts are created internally in the i915 to - * run these BBs. Once a slot is configured for N BBs only N BBs can be - * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user - * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how - * many BBs there are based on the slot's configuration. The N BBs are the last - * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set. - * - * The default placement behavior is to create implicit bonds between each - * context if each context maps to more than 1 physical engine (e.g. context is - * a virtual engine). Also we only allow contexts of same engine class and these - * contexts must be in logically contiguous order. Examples of the placement - * behavior are described below. Lastly, the default is to not allow BBs to be - * preempted mid-batch. Rather insert coordinated preemption points on all - * hardware contexts between each set of BBs. Flags could be added in the future - * to change both of these default behaviors. - * - * Returns -EINVAL if hardware context placement configuration is invalid or if - * the placement configuration isn't supported on the platform / submission - * interface. - * Returns -ENODEV if extension isn't supported on the platform / submission - * interface. - * - * .. code-block:: none - * - * Examples syntax: - * CS[X] = generic engine of same class, logical instance X - * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE - * - * Example 1 pseudo code: - * set_engines(INVALID) - * set_parallel(engine_index=0, width=2, num_siblings=1, - * engines=CS[0],CS[1]) - * - * Results in the following valid placement: - * CS[0], CS[1] - * - * Example 2 pseudo code: - * set_engines(INVALID) - * set_parallel(engine_index=0, width=2, num_siblings=2, - * engines=CS[0],CS[2],CS[1],CS[3]) - * - * Results in the following valid placements: - * CS[0], CS[1] - * CS[2], CS[3] - * - * This can be thought of as two virtual engines, each containing two - * engines thereby making a 2D array. However, there are bonds tying the - * entries together and placing restrictions on how they can be scheduled. - * Specifically, the scheduler can choose only vertical columns from the 2D - * array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the - * scheduler wants to submit to CS[0], it must also choose CS[1] and vice - * versa. Same for CS[2] requires also using CS[3]. - * VE[0] = CS[0], CS[2] - * VE[1] = CS[1], CS[3] - * - * Example 3 pseudo code: - * set_engines(INVALID) - * set_parallel(engine_index=0, width=2, num_siblings=2, - * engines=CS[0],CS[1],CS[1],CS[3]) - * - * Results in the following valid and invalid placements: - * CS[0], CS[1] - * CS[1], CS[3] - Not logically contiguous, return -EINVAL - */ -struct i915_context_engines_parallel_submit { - /** - * @base: base user extension. - */ - struct i915_user_extension base; - - /** - * @engine_index: slot for parallel engine - */ - __u16 engine_index; - - /** - * @width: number of contexts per parallel engine or in other words the - * number of batches in each submission - */ - __u16 width; - - /** - * @num_siblings: number of siblings per context or in other words the - * number of possible placements for each submission - */ - __u16 num_siblings; - - /** - * @mbz16: reserved for future use; must be zero - */ - __u16 mbz16; - - /** - * @flags: all undefined flags must be zero, currently not defined flags - */ - __u64 flags; - - /** - * @mbz64: reserved for future use; must be zero - */ - __u64 mbz64[3]; - - /** - * @engines: 2-d array of engine instances to configure parallel engine - * - * length = width (i) * num_siblings (j) - * index = j + i * num_siblings - */ - struct i915_engine_class_instance engines[]; - -} __packed; - -#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \ - struct i915_user_extension base; \ - __u16 engine_index; \ - __u16 width; \ - __u16 num_siblings; \ - __u16 mbz16; \ - __u64 flags; \ - __u64 mbz64[3]; \ - struct i915_engine_class_instance engines[N__]; \ -} __attribute__((packed)) name__ - -/** - * DOC: Context Engine Map uAPI - * - * Context engine map is a new way of addressing engines when submitting batch- - * buffers, replacing the existing way of using identifiers like `I915_EXEC_BLT` - * inside the flags field of `struct drm_i915_gem_execbuffer2`. - * - * To use it created GEM contexts need to be configured with a list of engines - * the user is intending to submit to. This is accomplished using the - * `I915_CONTEXT_PARAM_ENGINES` parameter and `struct - * i915_context_param_engines`. - * - * For such contexts the `I915_EXEC_RING_MASK` field becomes an index into the - * configured map. - * - * Example of creating such context and submitting against it: - * - * .. code-block:: C - * - * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = { - * .engines = { { I915_ENGINE_CLASS_RENDER, 0 }, - * { I915_ENGINE_CLASS_COPY, 0 } } - * }; - * struct drm_i915_gem_context_create_ext_setparam p_engines = { - * .base = { - * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, - * }, - * .param = { - * .param = I915_CONTEXT_PARAM_ENGINES, - * .value = to_user_pointer(&engines), - * .size = sizeof(engines), - * }, - * }; - * struct drm_i915_gem_context_create_ext create = { - * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, - * .extensions = to_user_pointer(&p_engines); - * }; - * - * ctx_id = gem_context_create_ext(drm_fd, &create); - * - * // We have now created a GEM context with two engines in the map: - * // Index 0 points to rcs0 while index 1 points to bcs0. Other engines - * // will not be accessible from this context. - * - * ... - * execbuf.rsvd1 = ctx_id; - * execbuf.flags = 0; // Submits to index 0, which is rcs0 for this context - * gem_execbuf(drm_fd, &execbuf); - * - * ... - * execbuf.rsvd1 = ctx_id; - * execbuf.flags = 1; // Submits to index 0, which is bcs0 for this context - * gem_execbuf(drm_fd, &execbuf); - */ - -struct i915_context_param_engines { - __u64 extensions; /* linked chain of extension blocks, 0 terminates */ -#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ -#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */ -#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */ - struct i915_engine_class_instance engines[]; -} __attribute__((packed)); - -#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \ - __u64 extensions; \ - struct i915_engine_class_instance engines[N__]; \ -} __attribute__((packed)) name__ - -struct i915_gem_context_param_context_image { - /** @engine: Engine class & instance to be configured. */ - struct i915_engine_class_instance engine; - - /** @flags: One of the supported flags or zero. */ - __u32 flags; -#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0) - - /** @size: Size of the image blob pointed to by @image. */ - __u32 size; - - /** @mbz: Must be zero. */ - __u32 mbz; - - /** @image: Userspace memory containing the context image. */ - __u64 image; -} __attribute__((packed)); - -/** - * struct drm_i915_gem_context_create_ext_setparam - Context parameter - * to set or query during context creation. - */ -struct drm_i915_gem_context_create_ext_setparam { - /** @base: Extension link. See struct i915_user_extension. */ - struct i915_user_extension base; - - /** - * @param: Context parameter to set or query. - * See struct drm_i915_gem_context_param. - */ - struct drm_i915_gem_context_param param; -}; - -struct drm_i915_gem_context_destroy { - __u32 ctx_id; - __u32 pad; -}; - -/** - * struct drm_i915_gem_vm_control - Structure to create or destroy VM. - * - * DRM_I915_GEM_VM_CREATE - - * - * Create a new virtual memory address space (ppGTT) for use within a context - * on the same file. Extensions can be provided to configure exactly how the - * address space is setup upon creation. - * - * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is - * returned in the outparam @id. - * - * An extension chain maybe provided, starting with @extensions, and terminated - * by the @next_extension being 0. Currently, no extensions are defined. - * - * DRM_I915_GEM_VM_DESTROY - - * - * Destroys a previously created VM id, specified in @vm_id. - * - * No extensions or flags are allowed currently, and so must be zero. - */ -struct drm_i915_gem_vm_control { - /** @extensions: Zero-terminated chain of extensions. */ - __u64 extensions; - - /** @flags: reserved for future usage, currently MBZ */ - __u32 flags; - - /** @vm_id: Id of the VM created or to be destroyed */ - __u32 vm_id; -}; - -struct drm_i915_reg_read { - /* - * Register offset. - * For 64bit wide registers where the upper 32bits don't immediately - * follow the lower 32bits, the offset of the lower 32bits must - * be specified - */ - __u64 offset; -#define I915_REG_READ_8B_WA (1ul << 0) - - __u64 val; /* Return value */ -}; - -/* Known registers: - * - * Render engine timestamp - 0x2358 + 64bit - gen7+ - * - Note this register returns an invalid value if using the default - * single instruction 8byte read, in order to workaround that pass - * flag I915_REG_READ_8B_WA in offset field. - * - */ - -/* - * struct drm_i915_reset_stats - Return global reset and other context stats - * - * Driver keeps few stats for each contexts and also global reset count. - * This struct can be used to query those stats. - */ -struct drm_i915_reset_stats { - /** @ctx_id: ID of the requested context */ - __u32 ctx_id; - - /** @flags: MBZ */ - __u32 flags; - - /** @reset_count: All resets since boot/module reload, for all contexts */ - __u32 reset_count; - - /** @batch_active: Number of batches lost when active in GPU, for this context */ - __u32 batch_active; - - /** @batch_pending: Number of batches lost pending for execution, for this context */ - __u32 batch_pending; - - /** @pad: MBZ */ - __u32 pad; -}; - -/** - * struct drm_i915_gem_userptr - Create GEM object from user allocated memory. - * - * Userptr objects have several restrictions on what ioctls can be used with the - * object handle. - */ -struct drm_i915_gem_userptr { - /** - * @user_ptr: The pointer to the allocated memory. - * - * Needs to be aligned to PAGE_SIZE. - */ - __u64 user_ptr; - - /** - * @user_size: - * - * The size in bytes for the allocated memory. This will also become the - * object size. - * - * Needs to be aligned to PAGE_SIZE, and should be at least PAGE_SIZE, - * or larger. - */ - __u64 user_size; - - /** - * @flags: - * - * Supported flags: - * - * I915_USERPTR_READ_ONLY: - * - * Mark the object as readonly, this also means GPU access can only be - * readonly. This is only supported on HW which supports readonly access - * through the GTT. If the HW can't support readonly access, an error is - * returned. - * - * I915_USERPTR_PROBE: - * - * Probe the provided @user_ptr range and validate that the @user_ptr is - * indeed pointing to normal memory and that the range is also valid. - * For example if some garbage address is given to the kernel, then this - * should complain. - * - * Returns -EFAULT if the probe failed. - * - * Note that this doesn't populate the backing pages, and also doesn't - * guarantee that the object will remain valid when the object is - * eventually used. - * - * The kernel supports this feature if I915_PARAM_HAS_USERPTR_PROBE - * returns a non-zero value. - * - * I915_USERPTR_UNSYNCHRONIZED: - * - * NOT USED. Setting this flag will result in an error. - */ - __u32 flags; -#define I915_USERPTR_READ_ONLY 0x1 -#define I915_USERPTR_PROBE 0x2 -#define I915_USERPTR_UNSYNCHRONIZED 0x80000000 - /** - * @handle: Returned handle for the object. - * - * Object handles are nonzero. - */ - __u32 handle; -}; - -enum drm_i915_oa_format { - I915_OA_FORMAT_A13 = 1, /* HSW only */ - I915_OA_FORMAT_A29, /* HSW only */ - I915_OA_FORMAT_A13_B8_C8, /* HSW only */ - I915_OA_FORMAT_B4_C8, /* HSW only */ - I915_OA_FORMAT_A45_B8_C8, /* HSW only */ - I915_OA_FORMAT_B4_C8_A16, /* HSW only */ - I915_OA_FORMAT_C4_B8, /* HSW+ */ - - /* Gen8+ */ - I915_OA_FORMAT_A12, - I915_OA_FORMAT_A12_B8_C8, - I915_OA_FORMAT_A32u40_A4u32_B8_C8, - - /* DG2 */ - I915_OAR_FORMAT_A32u40_A4u32_B8_C8, - I915_OA_FORMAT_A24u40_A14u32_B8_C8, - - /* MTL OAM */ - I915_OAM_FORMAT_MPEC8u64_B8_C8, - I915_OAM_FORMAT_MPEC8u32_B8_C8, - - I915_OA_FORMAT_MAX /* non-ABI */ -}; - -enum drm_i915_perf_property_id { - /** - * Open the stream for a specific context handle (as used with - * execbuffer2). A stream opened for a specific context this way - * won't typically require root privileges. - * - * This property is available in perf revision 1. - */ - DRM_I915_PERF_PROP_CTX_HANDLE = 1, - - /** - * A value of 1 requests the inclusion of raw OA unit reports as - * part of stream samples. - * - * This property is available in perf revision 1. - */ - DRM_I915_PERF_PROP_SAMPLE_OA, - - /** - * The value specifies which set of OA unit metrics should be - * configured, defining the contents of any OA unit reports. - * - * This property is available in perf revision 1. - */ - DRM_I915_PERF_PROP_OA_METRICS_SET, - - /** - * The value specifies the size and layout of OA unit reports. - * - * This property is available in perf revision 1. - */ - DRM_I915_PERF_PROP_OA_FORMAT, - - /** - * Specifying this property implicitly requests periodic OA unit - * sampling and (at least on Haswell) the sampling frequency is derived - * from this exponent as follows: - * - * 80ns * 2^(period_exponent + 1) - * - * This property is available in perf revision 1. - */ - DRM_I915_PERF_PROP_OA_EXPONENT, - - /** - * Specifying this property is only valid when specify a context to - * filter with DRM_I915_PERF_PROP_CTX_HANDLE. Specifying this property - * will hold preemption of the particular context we want to gather - * performance data about. The execbuf2 submissions must include a - * drm_i915_gem_execbuffer_ext_perf parameter for this to apply. - * - * This property is available in perf revision 3. - */ - DRM_I915_PERF_PROP_HOLD_PREEMPTION, - - /** - * Specifying this pins all contexts to the specified SSEU power - * configuration for the duration of the recording. - * - * This parameter's value is a pointer to a struct - * drm_i915_gem_context_param_sseu. - * - * This property is available in perf revision 4. - */ - DRM_I915_PERF_PROP_GLOBAL_SSEU, - - /** - * This optional parameter specifies the timer interval in nanoseconds - * at which the i915 driver will check the OA buffer for available data. - * Minimum allowed value is 100 microseconds. A default value is used by - * the driver if this parameter is not specified. Note that larger timer - * values will reduce cpu consumption during OA perf captures. However, - * excessively large values would potentially result in OA buffer - * overwrites as captures reach end of the OA buffer. - * - * This property is available in perf revision 5. - */ - DRM_I915_PERF_PROP_POLL_OA_PERIOD, - - /** - * Multiple engines may be mapped to the same OA unit. The OA unit is - * identified by class:instance of any engine mapped to it. - * - * This parameter specifies the engine class and must be passed along - * with DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE. - * - * This property is available in perf revision 6. - */ - DRM_I915_PERF_PROP_OA_ENGINE_CLASS, - - /** - * This parameter specifies the engine instance and must be passed along - * with DRM_I915_PERF_PROP_OA_ENGINE_CLASS. - * - * This property is available in perf revision 6. - */ - DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE, - - DRM_I915_PERF_PROP_MAX /* non-ABI */ -}; - -struct drm_i915_perf_open_param { - __u32 flags; -#define I915_PERF_FLAG_FD_CLOEXEC (1<<0) -#define I915_PERF_FLAG_FD_NONBLOCK (1<<1) -#define I915_PERF_FLAG_DISABLED (1<<2) - - /** The number of u64 (id, value) pairs */ - __u32 num_properties; - - /** - * Pointer to array of u64 (id, value) pairs configuring the stream - * to open. - */ - __u64 properties_ptr; -}; - -/* - * Enable data capture for a stream that was either opened in a disabled state - * via I915_PERF_FLAG_DISABLED or was later disabled via - * I915_PERF_IOCTL_DISABLE. - * - * It is intended to be cheaper to disable and enable a stream than it may be - * to close and re-open a stream with the same configuration. - * - * It's undefined whether any pending data for the stream will be lost. - * - * This ioctl is available in perf revision 1. - */ -#define I915_PERF_IOCTL_ENABLE _IO('i', 0x0) - -/* - * Disable data capture for a stream. - * - * It is an error to try and read a stream that is disabled. - * - * This ioctl is available in perf revision 1. - */ -#define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) - -/* - * Change metrics_set captured by a stream. - * - * If the stream is bound to a specific context, the configuration change - * will performed inline with that context such that it takes effect before - * the next execbuf submission. - * - * Returns the previously bound metrics set id, or a negative error code. - * - * This ioctl is available in perf revision 2. - */ -#define I915_PERF_IOCTL_CONFIG _IO('i', 0x2) - -/* - * Common to all i915 perf records - */ -struct drm_i915_perf_record_header { - __u32 type; - __u16 pad; - __u16 size; -}; - -enum drm_i915_perf_record_type { - - /** - * Samples are the work horse record type whose contents are extensible - * and defined when opening an i915 perf stream based on the given - * properties. - * - * Boolean properties following the naming convention - * DRM_I915_PERF_SAMPLE_xyz_PROP request the inclusion of 'xyz' data in - * every sample. - * - * The order of these sample properties given by userspace has no - * affect on the ordering of data within a sample. The order is - * documented here. - * - * struct { - * struct drm_i915_perf_record_header header; - * - * { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA - * }; - */ - DRM_I915_PERF_RECORD_SAMPLE = 1, - - /* - * Indicates that one or more OA reports were not written by the - * hardware. This can happen for example if an MI_REPORT_PERF_COUNT - * command collides with periodic sampling - which would be more likely - * at higher sampling frequencies. - */ - DRM_I915_PERF_RECORD_OA_REPORT_LOST = 2, - - /** - * An error occurred that resulted in all pending OA reports being lost. - */ - DRM_I915_PERF_RECORD_OA_BUFFER_LOST = 3, - - DRM_I915_PERF_RECORD_MAX /* non-ABI */ -}; - -/** - * struct drm_i915_perf_oa_config - * - * Structure to upload perf dynamic configuration into the kernel. - */ -struct drm_i915_perf_oa_config { - /** - * @uuid: - * - * String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x" - */ - char uuid[36]; - - /** - * @n_mux_regs: - * - * Number of mux regs in &mux_regs_ptr. - */ - __u32 n_mux_regs; - - /** - * @n_boolean_regs: - * - * Number of boolean regs in &boolean_regs_ptr. - */ - __u32 n_boolean_regs; - - /** - * @n_flex_regs: - * - * Number of flex regs in &flex_regs_ptr. - */ - __u32 n_flex_regs; - - /** - * @mux_regs_ptr: - * - * Pointer to tuples of u32 values (register address, value) for mux - * registers. Expected length of buffer is (2 * sizeof(u32) * - * &n_mux_regs). - */ - __u64 mux_regs_ptr; - - /** - * @boolean_regs_ptr: - * - * Pointer to tuples of u32 values (register address, value) for mux - * registers. Expected length of buffer is (2 * sizeof(u32) * - * &n_boolean_regs). - */ - __u64 boolean_regs_ptr; - - /** - * @flex_regs_ptr: - * - * Pointer to tuples of u32 values (register address, value) for mux - * registers. Expected length of buffer is (2 * sizeof(u32) * - * &n_flex_regs). - */ - __u64 flex_regs_ptr; -}; - -/** - * struct drm_i915_query_item - An individual query for the kernel to process. - * - * The behaviour is determined by the @query_id. Note that exactly what - * @data_ptr is also depends on the specific @query_id. - */ -struct drm_i915_query_item { - /** - * @query_id: - * - * The id for this query. Currently accepted query IDs are: - * - %DRM_I915_QUERY_TOPOLOGY_INFO (see struct drm_i915_query_topology_info) - * - %DRM_I915_QUERY_ENGINE_INFO (see struct drm_i915_engine_info) - * - %DRM_I915_QUERY_PERF_CONFIG (see struct drm_i915_query_perf_config) - * - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions) - * - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`) - * - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info) - * - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct drm_i915_query_guc_submission_version) - */ - __u64 query_id; -#define DRM_I915_QUERY_TOPOLOGY_INFO 1 -#define DRM_I915_QUERY_ENGINE_INFO 2 -#define DRM_I915_QUERY_PERF_CONFIG 3 -#define DRM_I915_QUERY_MEMORY_REGIONS 4 -#define DRM_I915_QUERY_HWCONFIG_BLOB 5 -#define DRM_I915_QUERY_GEOMETRY_SUBSLICES 6 -#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION 7 -/* Must be kept compact -- no holes and well documented */ - - /** - * @length: - * - * When set to zero by userspace, this is filled with the size of the - * data to be written at the @data_ptr pointer. The kernel sets this - * value to a negative value to signal an error on a particular query - * item. - */ - __s32 length; - - /** - * @flags: - * - * When &query_id == %DRM_I915_QUERY_TOPOLOGY_INFO, must be 0. - * - * When &query_id == %DRM_I915_QUERY_PERF_CONFIG, must be one of the - * following: - * - * - %DRM_I915_QUERY_PERF_CONFIG_LIST - * - %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID - * - %DRM_I915_QUERY_PERF_CONFIG_FOR_UUID - * - * When &query_id == %DRM_I915_QUERY_GEOMETRY_SUBSLICES must contain - * a struct i915_engine_class_instance that references a render engine. - */ - __u32 flags; -#define DRM_I915_QUERY_PERF_CONFIG_LIST 1 -#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID 2 -#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID 3 - - /** - * @data_ptr: - * - * Data will be written at the location pointed by @data_ptr when the - * value of @length matches the length of the data to be written by the - * kernel. - */ - __u64 data_ptr; -}; - -/** - * struct drm_i915_query - Supply an array of struct drm_i915_query_item for the - * kernel to fill out. - * - * Note that this is generally a two step process for each struct - * drm_i915_query_item in the array: - * - * 1. Call the DRM_IOCTL_I915_QUERY, giving it our array of struct - * drm_i915_query_item, with &drm_i915_query_item.length set to zero. The - * kernel will then fill in the size, in bytes, which tells userspace how - * memory it needs to allocate for the blob(say for an array of properties). - * - * 2. Next we call DRM_IOCTL_I915_QUERY again, this time with the - * &drm_i915_query_item.data_ptr equal to our newly allocated blob. Note that - * the &drm_i915_query_item.length should still be the same as what the - * kernel previously set. At this point the kernel can fill in the blob. - * - * Note that for some query items it can make sense for userspace to just pass - * in a buffer/blob equal to or larger than the required size. In this case only - * a single ioctl call is needed. For some smaller query items this can work - * quite well. - * - */ -struct drm_i915_query { - /** @num_items: The number of elements in the @items_ptr array */ - __u32 num_items; - - /** - * @flags: Unused for now. Must be cleared to zero. - */ - __u32 flags; - - /** - * @items_ptr: - * - * Pointer to an array of struct drm_i915_query_item. The number of - * array elements is @num_items. - */ - __u64 items_ptr; -}; - -/** - * struct drm_i915_query_topology_info - * - * Describes slice/subslice/EU information queried by - * %DRM_I915_QUERY_TOPOLOGY_INFO - */ -struct drm_i915_query_topology_info { - /** - * @flags: - * - * Unused for now. Must be cleared to zero. - */ - __u16 flags; - - /** - * @max_slices: - * - * The number of bits used to express the slice mask. - */ - __u16 max_slices; - - /** - * @max_subslices: - * - * The number of bits used to express the subslice mask. - */ - __u16 max_subslices; - - /** - * @max_eus_per_subslice: - * - * The number of bits in the EU mask that correspond to a single - * subslice's EUs. - */ - __u16 max_eus_per_subslice; - - /** - * @subslice_offset: - * - * Offset in data[] at which the subslice masks are stored. - */ - __u16 subslice_offset; - - /** - * @subslice_stride: - * - * Stride at which each of the subslice masks for each slice are - * stored. - */ - __u16 subslice_stride; - - /** - * @eu_offset: - * - * Offset in data[] at which the EU masks are stored. - */ - __u16 eu_offset; - - /** - * @eu_stride: - * - * Stride at which each of the EU masks for each subslice are stored. - */ - __u16 eu_stride; - - /** - * @data: - * - * Contains 3 pieces of information : - * - * - The slice mask with one bit per slice telling whether a slice is - * available. The availability of slice X can be queried with the - * following formula : - * - * .. code:: c - * - * (data[X / 8] >> (X % 8)) & 1 - * - * Starting with Xe_HP platforms, Intel hardware no longer has - * traditional slices so i915 will always report a single slice - * (hardcoded slicemask = 0x1) which contains all of the platform's - * subslices. I.e., the mask here does not reflect any of the newer - * hardware concepts such as "gslices" or "cslices" since userspace - * is capable of inferring those from the subslice mask. - * - * - The subslice mask for each slice with one bit per subslice telling - * whether a subslice is available. Starting with Gen12 we use the - * term "subslice" to refer to what the hardware documentation - * describes as a "dual-subslices." The availability of subslice Y - * in slice X can be queried with the following formula : - * - * .. code:: c - * - * (data[subslice_offset + X * subslice_stride + Y / 8] >> (Y % 8)) & 1 - * - * - The EU mask for each subslice in each slice, with one bit per EU - * telling whether an EU is available. The availability of EU Z in - * subslice Y in slice X can be queried with the following formula : - * - * .. code:: c - * - * (data[eu_offset + - * (X * max_subslices + Y) * eu_stride + - * Z / 8 - * ] >> (Z % 8)) & 1 - */ - __u8 data[]; -}; - -/** - * DOC: Engine Discovery uAPI - * - * Engine discovery uAPI is a way of enumerating physical engines present in a - * GPU associated with an open i915 DRM file descriptor. This supersedes the old - * way of using `DRM_IOCTL_I915_GETPARAM` and engine identifiers like - * `I915_PARAM_HAS_BLT`. - * - * The need for this interface came starting with Icelake and newer GPUs, which - * started to establish a pattern of having multiple engines of a same class, - * where not all instances were always completely functionally equivalent. - * - * Entry point for this uapi is `DRM_IOCTL_I915_QUERY` with the - * `DRM_I915_QUERY_ENGINE_INFO` as the queried item id. - * - * Example for getting the list of engines: - * - * .. code-block:: C - * - * struct drm_i915_query_engine_info *info; - * struct drm_i915_query_item item = { - * .query_id = DRM_I915_QUERY_ENGINE_INFO; - * }; - * struct drm_i915_query query = { - * .num_items = 1, - * .items_ptr = (uintptr_t)&item, - * }; - * int err, i; - * - * // First query the size of the blob we need, this needs to be large - * // enough to hold our array of engines. The kernel will fill out the - * // item.length for us, which is the number of bytes we need. - * // - * // Alternatively a large buffer can be allocated straightaway enabling - * // querying in one pass, in which case item.length should contain the - * // length of the provided buffer. - * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); - * if (err) ... - * - * info = calloc(1, item.length); - * // Now that we allocated the required number of bytes, we call the ioctl - * // again, this time with the data_ptr pointing to our newly allocated - * // blob, which the kernel can then populate with info on all engines. - * item.data_ptr = (uintptr_t)&info; - * - * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); - * if (err) ... - * - * // We can now access each engine in the array - * for (i = 0; i < info->num_engines; i++) { - * struct drm_i915_engine_info einfo = info->engines[i]; - * u16 class = einfo.engine.class; - * u16 instance = einfo.engine.instance; - * .... - * } - * - * free(info); - * - * Each of the enumerated engines, apart from being defined by its class and - * instance (see `struct i915_engine_class_instance`), also can have flags and - * capabilities defined as documented in i915_drm.h. - * - * For instance video engines which support HEVC encoding will have the - * `I915_VIDEO_CLASS_CAPABILITY_HEVC` capability bit set. - * - * Engine discovery only fully comes to its own when combined with the new way - * of addressing engines when submitting batch buffers using contexts with - * engine maps configured. - */ - -/** - * struct drm_i915_engine_info - * - * Describes one engine and its capabilities as known to the driver. - */ -struct drm_i915_engine_info { - /** @engine: Engine class and instance. */ - struct i915_engine_class_instance engine; - - /** @rsvd0: Reserved field. */ - __u32 rsvd0; - - /** @flags: Engine flags. */ - __u64 flags; -#define I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE (1 << 0) - - /** @capabilities: Capabilities of this engine. */ - __u64 capabilities; -#define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0) -#define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1) - - /** @logical_instance: Logical instance of engine */ - __u16 logical_instance; - - /** @rsvd1: Reserved fields. */ - __u16 rsvd1[3]; - /** @rsvd2: Reserved fields. */ - __u64 rsvd2[3]; -}; - -/** - * struct drm_i915_query_engine_info - * - * Engine info query enumerates all engines known to the driver by filling in - * an array of struct drm_i915_engine_info structures. - */ -struct drm_i915_query_engine_info { - /** @num_engines: Number of struct drm_i915_engine_info structs following. */ - __u32 num_engines; - - /** @rsvd: MBZ */ - __u32 rsvd[3]; - - /** @engines: Marker for drm_i915_engine_info structures. */ - struct drm_i915_engine_info engines[]; -}; - -/** - * struct drm_i915_query_perf_config - * - * Data written by the kernel with query %DRM_I915_QUERY_PERF_CONFIG and - * %DRM_I915_QUERY_GEOMETRY_SUBSLICES. - */ -struct drm_i915_query_perf_config { - union { - /** - * @n_configs: - * - * When &drm_i915_query_item.flags == - * %DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets this fields to - * the number of configurations available. - */ - __u64 n_configs; - - /** - * @config: - * - * When &drm_i915_query_item.flags == - * %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID, i915 will use the - * value in this field as configuration identifier to decide - * what data to write into config_ptr. - */ - __u64 config; - - /** - * @uuid: - * - * When &drm_i915_query_item.flags == - * %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID, i915 will use the - * value in this field as configuration identifier to decide - * what data to write into config_ptr. - * - * String formatted like "%08x-%04x-%04x-%04x-%012x" - */ - char uuid[36]; - }; - - /** - * @flags: - * - * Unused for now. Must be cleared to zero. - */ - __u32 flags; - - /** - * @data: - * - * When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_LIST, - * i915 will write an array of __u64 of configuration identifiers. - * - * When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_DATA, - * i915 will write a struct drm_i915_perf_oa_config. If the following - * fields of struct drm_i915_perf_oa_config are not set to 0, i915 will - * write into the associated pointers the values of submitted when the - * configuration was created : - * - * - &drm_i915_perf_oa_config.n_mux_regs - * - &drm_i915_perf_oa_config.n_boolean_regs - * - &drm_i915_perf_oa_config.n_flex_regs - */ - __u8 data[]; -}; - -/** - * enum drm_i915_gem_memory_class - Supported memory classes - */ -enum drm_i915_gem_memory_class { - /** @I915_MEMORY_CLASS_SYSTEM: System memory */ - I915_MEMORY_CLASS_SYSTEM = 0, - /** @I915_MEMORY_CLASS_DEVICE: Device local-memory */ - I915_MEMORY_CLASS_DEVICE, -}; - -/** - * struct drm_i915_gem_memory_class_instance - Identify particular memory region - */ -struct drm_i915_gem_memory_class_instance { - /** @memory_class: See enum drm_i915_gem_memory_class */ - __u16 memory_class; - - /** @memory_instance: Which instance */ - __u16 memory_instance; -}; - -/** - * struct drm_i915_memory_region_info - Describes one region as known to the - * driver. - * - * Note this is using both struct drm_i915_query_item and struct drm_i915_query. - * For this new query we are adding the new query id DRM_I915_QUERY_MEMORY_REGIONS - * at &drm_i915_query_item.query_id. - */ -struct drm_i915_memory_region_info { - /** @region: The class:instance pair encoding */ - struct drm_i915_gem_memory_class_instance region; - - /** @rsvd0: MBZ */ - __u32 rsvd0; - - /** - * @probed_size: Memory probed by the driver - * - * Note that it should not be possible to ever encounter a zero value - * here, also note that no current region type will ever return -1 here. - * Although for future region types, this might be a possibility. The - * same applies to the other size fields. - */ - __u64 probed_size; - - /** - * @unallocated_size: Estimate of memory remaining - * - * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable accounting. - * Without this (or if this is an older kernel) the value here will - * always equal the @probed_size. Note this is only currently tracked - * for I915_MEMORY_CLASS_DEVICE regions (for other types the value here - * will always equal the @probed_size). - */ - __u64 unallocated_size; - - union { - /** @rsvd1: MBZ */ - __u64 rsvd1[8]; - struct { - /** - * @probed_cpu_visible_size: Memory probed by the driver - * that is CPU accessible. - * - * This will be always be <= @probed_size, and the - * remainder (if there is any) will not be CPU - * accessible. - * - * On systems without small BAR, the @probed_size will - * always equal the @probed_cpu_visible_size, since all - * of it will be CPU accessible. - * - * Note this is only tracked for - * I915_MEMORY_CLASS_DEVICE regions (for other types the - * value here will always equal the @probed_size). - * - * Note that if the value returned here is zero, then - * this must be an old kernel which lacks the relevant - * small-bar uAPI support (including - * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS), but on - * such systems we should never actually end up with a - * small BAR configuration, assuming we are able to load - * the kernel module. Hence it should be safe to treat - * this the same as when @probed_cpu_visible_size == - * @probed_size. - */ - __u64 probed_cpu_visible_size; - - /** - * @unallocated_cpu_visible_size: Estimate of CPU - * visible memory remaining. - * - * Note this is only tracked for - * I915_MEMORY_CLASS_DEVICE regions (for other types the - * value here will always equal the - * @probed_cpu_visible_size). - * - * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable - * accounting. Without this the value here will always - * equal the @probed_cpu_visible_size. Note this is only - * currently tracked for I915_MEMORY_CLASS_DEVICE - * regions (for other types the value here will also - * always equal the @probed_cpu_visible_size). - * - * If this is an older kernel the value here will be - * zero, see also @probed_cpu_visible_size. - */ - __u64 unallocated_cpu_visible_size; - }; - }; -}; - -/** - * struct drm_i915_query_memory_regions - * - * The region info query enumerates all regions known to the driver by filling - * in an array of struct drm_i915_memory_region_info structures. - * - * Example for getting the list of supported regions: - * - * .. code-block:: C - * - * struct drm_i915_query_memory_regions *info; - * struct drm_i915_query_item item = { - * .query_id = DRM_I915_QUERY_MEMORY_REGIONS; - * }; - * struct drm_i915_query query = { - * .num_items = 1, - * .items_ptr = (uintptr_t)&item, - * }; - * int err, i; - * - * // First query the size of the blob we need, this needs to be large - * // enough to hold our array of regions. The kernel will fill out the - * // item.length for us, which is the number of bytes we need. - * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); - * if (err) ... - * - * info = calloc(1, item.length); - * // Now that we allocated the required number of bytes, we call the ioctl - * // again, this time with the data_ptr pointing to our newly allocated - * // blob, which the kernel can then populate with the all the region info. - * item.data_ptr = (uintptr_t)&info, - * - * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); - * if (err) ... - * - * // We can now access each region in the array - * for (i = 0; i < info->num_regions; i++) { - * struct drm_i915_memory_region_info mr = info->regions[i]; - * u16 class = mr.region.class; - * u16 instance = mr.region.instance; - * - * .... - * } - * - * free(info); - */ -struct drm_i915_query_memory_regions { - /** @num_regions: Number of supported regions */ - __u32 num_regions; - - /** @rsvd: MBZ */ - __u32 rsvd[3]; - - /** @regions: Info about each supported region */ - struct drm_i915_memory_region_info regions[]; -}; - -/** - * struct drm_i915_query_guc_submission_version - query GuC submission interface version - */ -struct drm_i915_query_guc_submission_version { - /** @branch: Firmware branch version. */ - __u32 branch; - /** @major: Firmware major version. */ - __u32 major; - /** @minor: Firmware minor version. */ - __u32 minor; - /** @patch: Firmware patch version. */ - __u32 patch; -}; - -/** - * DOC: GuC HWCONFIG blob uAPI - * - * The GuC produces a blob with information about the current device. - * i915 reads this blob from GuC and makes it available via this uAPI. - * - * The format and meaning of the blob content are documented in the - * Programmer's Reference Manual. - */ - -/** - * struct drm_i915_gem_create_ext - Existing gem_create behaviour, with added - * extension support using struct i915_user_extension. - * - * Note that new buffer flags should be added here, at least for the stuff that - * is immutable. Previously we would have two ioctls, one to create the object - * with gem_create, and another to apply various parameters, however this - * creates some ambiguity for the params which are considered immutable. Also in - * general we're phasing out the various SET/GET ioctls. - */ -struct drm_i915_gem_create_ext { - /** - * @size: Requested size for the object. - * - * The (page-aligned) allocated size for the object will be returned. - * - * On platforms like DG2/ATS the kernel will always use 64K or larger - * pages for I915_MEMORY_CLASS_DEVICE. The kernel also requires a - * minimum of 64K GTT alignment for such objects. - * - * NOTE: Previously the ABI here required a minimum GTT alignment of 2M - * on DG2/ATS, due to how the hardware implemented 64K GTT page support, - * where we had the following complications: - * - * 1) The entire PDE (which covers a 2MB virtual address range), must - * contain only 64K PTEs, i.e mixing 4K and 64K PTEs in the same - * PDE is forbidden by the hardware. - * - * 2) We still need to support 4K PTEs for I915_MEMORY_CLASS_SYSTEM - * objects. - * - * However on actual production HW this was completely changed to now - * allow setting a TLB hint at the PTE level (see PS64), which is a lot - * more flexible than the above. With this the 2M restriction was - * dropped where we now only require 64K. - */ - __u64 size; - - /** - * @handle: Returned handle for the object. - * - * Object handles are nonzero. - */ - __u32 handle; - - /** - * @flags: Optional flags. - * - * Supported values: - * - * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that - * the object will need to be accessed via the CPU. - * - * Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and only - * strictly required on configurations where some subset of the device - * memory is directly visible/mappable through the CPU (which we also - * call small BAR), like on some DG2+ systems. Note that this is quite - * undesirable, but due to various factors like the client CPU, BIOS etc - * it's something we can expect to see in the wild. See - * &drm_i915_memory_region_info.probed_cpu_visible_size for how to - * determine if this system applies. - * - * Note that one of the placements MUST be I915_MEMORY_CLASS_SYSTEM, to - * ensure the kernel can always spill the allocation to system memory, - * if the object can't be allocated in the mappable part of - * I915_MEMORY_CLASS_DEVICE. - * - * Also note that since the kernel only supports flat-CCS on objects - * that can *only* be placed in I915_MEMORY_CLASS_DEVICE, we therefore - * don't support I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS together with - * flat-CCS. - * - * Without this hint, the kernel will assume that non-mappable - * I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the - * kernel can still migrate the object to the mappable part, as a last - * resort, if userspace ever CPU faults this object, but this might be - * expensive, and so ideally should be avoided. - * - * On older kernels which lack the relevant small-bar uAPI support (see - * also &drm_i915_memory_region_info.probed_cpu_visible_size), - * usage of the flag will result in an error, but it should NEVER be - * possible to end up with a small BAR configuration, assuming we can - * also successfully load the i915 kernel module. In such cases the - * entire I915_MEMORY_CLASS_DEVICE region will be CPU accessible, and as - * such there are zero restrictions on where the object can be placed. - */ -#define I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS (1 << 0) - __u32 flags; - - /** - * @extensions: The chain of extensions to apply to this object. - * - * This will be useful in the future when we need to support several - * different extensions, and we need to apply more than one when - * creating the object. See struct i915_user_extension. - * - * If we don't supply any extensions then we get the same old gem_create - * behaviour. - * - * For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see - * struct drm_i915_gem_create_ext_memory_regions. - * - * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see - * struct drm_i915_gem_create_ext_protected_content. - * - * For I915_GEM_CREATE_EXT_SET_PAT usage see - * struct drm_i915_gem_create_ext_set_pat. - */ -#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0 -#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1 -#define I915_GEM_CREATE_EXT_SET_PAT 2 - __u64 extensions; -}; - -/** - * struct drm_i915_gem_create_ext_memory_regions - The - * I915_GEM_CREATE_EXT_MEMORY_REGIONS extension. - * - * Set the object with the desired set of placements/regions in priority - * order. Each entry must be unique and supported by the device. - * - * This is provided as an array of struct drm_i915_gem_memory_class_instance, or - * an equivalent layout of class:instance pair encodings. See struct - * drm_i915_query_memory_regions and DRM_I915_QUERY_MEMORY_REGIONS for how to - * query the supported regions for a device. - * - * As an example, on discrete devices, if we wish to set the placement as - * device local-memory we can do something like: - * - * .. code-block:: C - * - * struct drm_i915_gem_memory_class_instance region_lmem = { - * .memory_class = I915_MEMORY_CLASS_DEVICE, - * .memory_instance = 0, - * }; - * struct drm_i915_gem_create_ext_memory_regions regions = { - * .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS }, - * .regions = (uintptr_t)®ion_lmem, - * .num_regions = 1, - * }; - * struct drm_i915_gem_create_ext create_ext = { - * .size = 16 * PAGE_SIZE, - * .extensions = (uintptr_t)®ions, - * }; - * - * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext); - * if (err) ... - * - * At which point we get the object handle in &drm_i915_gem_create_ext.handle, - * along with the final object size in &drm_i915_gem_create_ext.size, which - * should account for any rounding up, if required. - * - * Note that userspace has no means of knowing the current backing region - * for objects where @num_regions is larger than one. The kernel will only - * ensure that the priority order of the @regions array is honoured, either - * when initially placing the object, or when moving memory around due to - * memory pressure - * - * On Flat-CCS capable HW, compression is supported for the objects residing - * in I915_MEMORY_CLASS_DEVICE. When such objects (compressed) have other - * memory class in @regions and migrated (by i915, due to memory - * constraints) to the non I915_MEMORY_CLASS_DEVICE region, then i915 needs to - * decompress the content. But i915 doesn't have the required information to - * decompress the userspace compressed objects. - * - * So i915 supports Flat-CCS, on the objects which can reside only on - * I915_MEMORY_CLASS_DEVICE regions. - */ -struct drm_i915_gem_create_ext_memory_regions { - /** @base: Extension link. See struct i915_user_extension. */ - struct i915_user_extension base; - - /** @pad: MBZ */ - __u32 pad; - /** @num_regions: Number of elements in the @regions array. */ - __u32 num_regions; - /** - * @regions: The regions/placements array. - * - * An array of struct drm_i915_gem_memory_class_instance. - */ - __u64 regions; -}; - -/** - * struct drm_i915_gem_create_ext_protected_content - The - * I915_OBJECT_PARAM_PROTECTED_CONTENT extension. - * - * If this extension is provided, buffer contents are expected to be protected - * by PXP encryption and require decryption for scan out and processing. This - * is only possible on platforms that have PXP enabled, on all other scenarios - * using this extension will cause the ioctl to fail and return -ENODEV. The - * flags parameter is reserved for future expansion and must currently be set - * to zero. - * - * The buffer contents are considered invalid after a PXP session teardown. - * - * The encryption is guaranteed to be processed correctly only if the object - * is submitted with a context created using the - * I915_CONTEXT_PARAM_PROTECTED_CONTENT flag. This will also enable extra checks - * at submission time on the validity of the objects involved. - * - * Below is an example on how to create a protected object: - * - * .. code-block:: C - * - * struct drm_i915_gem_create_ext_protected_content protected_ext = { - * .base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT }, - * .flags = 0, - * }; - * struct drm_i915_gem_create_ext create_ext = { - * .size = PAGE_SIZE, - * .extensions = (uintptr_t)&protected_ext, - * }; - * - * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext); - * if (err) ... - */ -struct drm_i915_gem_create_ext_protected_content { - /** @base: Extension link. See struct i915_user_extension. */ - struct i915_user_extension base; - /** @flags: reserved for future usage, currently MBZ */ - __u32 flags; -}; - -/** - * struct drm_i915_gem_create_ext_set_pat - The - * I915_GEM_CREATE_EXT_SET_PAT extension. - * - * If this extension is provided, the specified caching policy (PAT index) is - * applied to the buffer object. - * - * Below is an example on how to create an object with specific caching policy: - * - * .. code-block:: C - * - * struct drm_i915_gem_create_ext_set_pat set_pat_ext = { - * .base = { .name = I915_GEM_CREATE_EXT_SET_PAT }, - * .pat_index = 0, - * }; - * struct drm_i915_gem_create_ext create_ext = { - * .size = PAGE_SIZE, - * .extensions = (uintptr_t)&set_pat_ext, - * }; - * - * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext); - * if (err) ... - */ -struct drm_i915_gem_create_ext_set_pat { - /** @base: Extension link. See struct i915_user_extension. */ - struct i915_user_extension base; - /** - * @pat_index: PAT index to be set - * PAT index is a bit field in Page Table Entry to control caching - * behaviors for GPU accesses. The definition of PAT index is - * platform dependent and can be found in hardware specifications, - */ - __u32 pat_index; - /** @rsvd: reserved for future use */ - __u32 rsvd; -}; - -/* ID of the protected content session managed by i915 when PXP is active */ -#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf - -#if defined(__cplusplus) -} -#endif - -#endif /* _UAPI_I915_DRM_H_ */ diff --git a/tools/include/uapi/linux/acct.h b/tools/include/uapi/linux/acct.h new file mode 100644 index 000000000000..1e2382ed4fd5 --- /dev/null +++ b/tools/include/uapi/linux/acct.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * BSD Process Accounting for Linux - Definitions + * + * Author: Marco van Wieringen (mvw@planets.elm.net) + * + * This header file contains the definitions needed to implement + * BSD-style process accounting. The kernel accounting code and all + * user-level programs that try to do something useful with the + * process accounting log must include this file. + * + * Copyright (C) 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V. + * + */ + +#ifndef _UAPI_LINUX_ACCT_H +#define _UAPI_LINUX_ACCT_H + +#include <linux/types.h> + +#include <asm/param.h> +#include <asm/byteorder.h> + +/* + * comp_t is a 16-bit "floating" point number with a 3-bit base 8 + * exponent and a 13-bit fraction. + * comp2_t is 24-bit with 5-bit base 2 exponent and 20 bit fraction + * (leading 1 not stored). + * See linux/kernel/acct.c for the specific encoding systems used. + */ + +typedef __u16 comp_t; +typedef __u32 comp2_t; + +/* + * accounting file record + * + * This structure contains all of the information written out to the + * process accounting file whenever a process exits. + */ + +#define ACCT_COMM 16 + +struct acct +{ + char ac_flag; /* Flags */ + char ac_version; /* Always set to ACCT_VERSION */ + /* for binary compatibility back until 2.0 */ + __u16 ac_uid16; /* LSB of Real User ID */ + __u16 ac_gid16; /* LSB of Real Group ID */ + __u16 ac_tty; /* Control Terminal */ + /* __u32 range means times from 1970 to 2106 */ + __u32 ac_btime; /* Process Creation Time */ + comp_t ac_utime; /* User Time */ + comp_t ac_stime; /* System Time */ + comp_t ac_etime; /* Elapsed Time */ + comp_t ac_mem; /* Average Memory Usage */ + comp_t ac_io; /* Chars Transferred */ + comp_t ac_rw; /* Blocks Read or Written */ + comp_t ac_minflt; /* Minor Pagefaults */ + comp_t ac_majflt; /* Major Pagefaults */ + comp_t ac_swaps; /* Number of Swaps */ +/* m68k had no padding here. */ +#if !defined(CONFIG_M68K) || !defined(__KERNEL__) + __u16 ac_ahz; /* AHZ */ +#endif + __u32 ac_exitcode; /* Exitcode */ + char ac_comm[ACCT_COMM + 1]; /* Command Name */ + __u8 ac_etime_hi; /* Elapsed Time MSB */ + __u16 ac_etime_lo; /* Elapsed Time LSB */ + __u32 ac_uid; /* Real User ID */ + __u32 ac_gid; /* Real Group ID */ +}; + +struct acct_v3 +{ + char ac_flag; /* Flags */ + char ac_version; /* Always set to ACCT_VERSION */ + __u16 ac_tty; /* Control Terminal */ + __u32 ac_exitcode; /* Exitcode */ + __u32 ac_uid; /* Real User ID */ + __u32 ac_gid; /* Real Group ID */ + __u32 ac_pid; /* Process ID */ + __u32 ac_ppid; /* Parent Process ID */ + /* __u32 range means times from 1970 to 2106 */ + __u32 ac_btime; /* Process Creation Time */ +#ifdef __KERNEL__ + __u32 ac_etime; /* Elapsed Time */ +#else + float ac_etime; /* Elapsed Time */ +#endif + comp_t ac_utime; /* User Time */ + comp_t ac_stime; /* System Time */ + comp_t ac_mem; /* Average Memory Usage */ + comp_t ac_io; /* Chars Transferred */ + comp_t ac_rw; /* Blocks Read or Written */ + comp_t ac_minflt; /* Minor Pagefaults */ + comp_t ac_majflt; /* Major Pagefaults */ + comp_t ac_swaps; /* Number of Swaps */ + char ac_comm[ACCT_COMM]; /* Command Name */ +}; + +/* + * accounting flags + */ + /* bit set when the process/task ... */ +#define AFORK 0x01 /* ... executed fork, but did not exec */ +#define ASU 0x02 /* ... used super-user privileges */ +#define ACOMPAT 0x04 /* ... used compatibility mode (VAX only not used) */ +#define ACORE 0x08 /* ... dumped core */ +#define AXSIG 0x10 /* ... was killed by a signal */ +#define AGROUP 0x20 /* ... was the last task of the process (task group) */ + +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN) +#define ACCT_BYTEORDER 0x80 /* accounting file is big endian */ +#elif defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) +#define ACCT_BYTEORDER 0x00 /* accounting file is little endian */ +#else +#error unspecified endianness +#endif + +#ifndef __KERNEL__ +#define ACCT_VERSION 2 +#define AHZ (HZ) +#endif /* __KERNEL */ + + +#endif /* _UAPI_LINUX_ACCT_H */ diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h index 5ee30f882736..a04afef9efca 100644 --- a/tools/include/uapi/linux/bits.h +++ b/tools/include/uapi/linux/bits.h @@ -4,13 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) \ - (((~_UL(0)) - (_UL(1) << (l)) + 1) & \ - (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) \ - (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ - (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 2acf9b336371..89b36de5fdbb 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -51,6 +51,9 @@ #define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ #define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ +#define BPF_LOAD_ACQ 0x100 /* load-acquire */ +#define BPF_STORE_REL 0x110 /* store-release */ + enum bpf_cond_pseudo_jmp { BPF_MAY_GOTO = 0, }; @@ -116,6 +119,14 @@ enum bpf_cgroup_iter_order { BPF_CGROUP_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */ BPF_CGROUP_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */ BPF_CGROUP_ITER_ANCESTORS_UP, /* walk ancestors upward. */ + /* + * Walks the immediate children of the specified parent + * cgroup_subsys_state. Unlike BPF_CGROUP_ITER_DESCENDANTS_PRE, + * BPF_CGROUP_ITER_DESCENDANTS_POST, and BPF_CGROUP_ITER_ANCESTORS_UP + * the iterator does not include the specified parent as one of the + * returned iterator elements. + */ + BPF_CGROUP_ITER_CHILDREN, }; union bpf_iter_link_info { @@ -447,6 +458,7 @@ union bpf_iter_link_info { * * **struct bpf_map_info** * * **struct bpf_btf_info** * * **struct bpf_link_info** + * * **struct bpf_token_info** * * Return * Returns zero on success. On error, -1 is returned and *errno* @@ -903,6 +915,27 @@ union bpf_iter_link_info { * A new file descriptor (a nonnegative integer), or -1 if an * error occurred (in which case, *errno* is set appropriately). * + * BPF_PROG_STREAM_READ_BY_FD + * Description + * Read data of a program's BPF stream. The program is identified + * by *prog_fd*, and the stream is identified by the *stream_id*. + * The data is copied to a buffer pointed to by *stream_buf*, and + * filled less than or equal to *stream_buf_len* bytes. + * + * Return + * Number of bytes read from the stream on success, or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_PROG_ASSOC_STRUCT_OPS + * Description + * Associate a BPF program with a struct_ops map. The struct_ops + * map is identified by *map_fd* and the BPF program is + * identified by *prog_fd*. + * + * Return + * 0 on success or -1 if an error occurred (in which case, + * *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -958,7 +991,10 @@ enum bpf_cmd { BPF_LINK_DETACH, BPF_PROG_BIND_MAP, BPF_TOKEN_CREATE, + BPF_PROG_STREAM_READ_BY_FD, + BPF_PROG_ASSOC_STRUCT_OPS, __MAX_BPF_CMD, + BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying syscall common attrs. */ }; enum bpf_map_type { @@ -1010,6 +1046,8 @@ enum bpf_map_type { BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, BPF_MAP_TYPE_ARENA, + BPF_MAP_TYPE_INSN_ARRAY, + BPF_MAP_TYPE_RHASH, __MAX_BPF_MAP_TYPE }; @@ -1117,6 +1155,10 @@ enum bpf_attach_type { BPF_NETKIT_PEER, BPF_TRACE_KPROBE_SESSION, BPF_TRACE_UPROBE_SESSION, + BPF_TRACE_FSESSION, + BPF_TRACE_FENTRY_MULTI, + BPF_TRACE_FEXIT_MULTI, + BPF_TRACE_FSESSION_MULTI, __MAX_BPF_ATTACH_TYPE }; @@ -1141,6 +1183,7 @@ enum bpf_link_type { BPF_LINK_TYPE_UPROBE_MULTI = 12, BPF_LINK_TYPE_NETKIT = 13, BPF_LINK_TYPE_SOCKMAP = 14, + BPF_LINK_TYPE_TRACING_MULTI = 15, __MAX_BPF_LINK_TYPE, }; @@ -1207,6 +1250,7 @@ enum bpf_perf_event_type { #define BPF_F_BEFORE (1U << 3) #define BPF_F_AFTER (1U << 4) #define BPF_F_ID (1U << 5) +#define BPF_F_PREORDER (1U << 6) #define BPF_F_LINK BPF_F_LINK /* 1 << 13 */ /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the @@ -1283,7 +1327,11 @@ enum { * BPF_TRACE_UPROBE_MULTI attach type to create return probe. */ enum { - BPF_F_UPROBE_MULTI_RETURN = (1U << 0) + /* Get return uprobe. */ + BPF_F_UPROBE_MULTI_RETURN = (1U << 0), + + /* Get path from provided path_fd. */ + BPF_F_UPROBE_MULTI_PATH_FD = (1U << 1), }; /* link_create.netfilter.flags used in LINK_CREATE command for @@ -1355,6 +1403,8 @@ enum { BPF_NOEXIST = 1, /* create new element if it didn't exist */ BPF_EXIST = 2, /* update existing element */ BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ + BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */ + BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */ }; /* flags for BPF_MAP_CREATE command */ @@ -1413,6 +1463,9 @@ enum { /* Do not translate kernel bpf_arena pointers to user pointers */ BPF_F_NO_USER_CONV = (1U << 18), + +/* Enable BPF ringbuf overwrite mode */ + BPF_F_RB_OVERWRITE = (1U << 19), }; /* Flags for BPF_PROG_QUERY. */ @@ -1457,8 +1510,20 @@ struct bpf_stack_build_id { }; }; +struct bpf_common_attr { + __aligned_u64 log_buf; + __u32 log_size; + __u32 log_level; + __u32 log_true_size; +}; + #define BPF_OBJ_NAME_LEN 16U +enum { + BPF_STREAM_STDOUT = 1, + BPF_STREAM_STDERR = 2, +}; + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -1489,6 +1554,11 @@ union bpf_attr { * * BPF_MAP_TYPE_ARENA - contains the address where user space * is going to mmap() the arena. It has to be page aligned. + * + * BPF_MAP_TYPE_RHASH - initial table size hint + * (nelem_hint). 0 = use rhashtable default. Must be + * <= min(max_entries, U16_MAX). Upper 32 bits reserved, + * must be zero. */ __u64 map_extra; @@ -1500,9 +1570,15 @@ union bpf_attr { * If provided, map_flags should have BPF_F_TOKEN_FD flag set. */ __s32 map_token_fd; + + /* Hash of the program that has exclusive access to the map. + */ + __aligned_u64 excl_prog_hash; + /* Size of the passed excl_prog_hash. */ + __u32 excl_prog_hash_size; }; - struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ + struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */ __u32 map_fd; __aligned_u64 key; union { @@ -1583,6 +1659,16 @@ union bpf_attr { * continuous. */ __u32 fd_array_cnt; + /* Pointer to a buffer containing the signature of the BPF + * program. + */ + __aligned_u64 signature; + /* Size of the signature buffer in bytes. */ + __u32 signature_size; + /* ID of the kernel keyring to be used for signature + * verification. + */ + __s32 keyring_id; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1648,6 +1734,7 @@ union bpf_attr { }; __u32 next_id; __u32 open_flags; + __s32 fd_by_id_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ @@ -1781,6 +1868,7 @@ union bpf_attr { __u32 cnt; __u32 flags; __u32 pid; + __u32 path_fd; } uprobe_multi; struct { union { @@ -1789,6 +1877,18 @@ union bpf_attr { }; __u64 expected_revision; } netkit; + struct { + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; + } cgroup; + struct { + __aligned_u64 ids; + __aligned_u64 cookies; + __u32 cnt; + } tracing_multi; }; } link_create; @@ -1837,6 +1937,19 @@ union bpf_attr { __u32 bpffs_fd; } token_create; + struct { + __aligned_u64 stream_buf; + __u32 stream_buf_len; + __u32 stream_id; + __u32 prog_fd; + } prog_stream_read; + + struct { + __u32 map_fd; + __u32 prog_fd; + __u32 flags; + } prog_assoc_struct_ops; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF @@ -1990,11 +2103,15 @@ union bpf_attr { * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet - * associated to *skb*, at *offset*. *flags* are a combination of - * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the - * checksum for the packet after storing the bytes) and - * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ - * **->swhash** and *skb*\ **->l4hash** to 0). + * associated to *skb*, at *offset*. The *flags* are a combination + * of the following values: + * + * **BPF_F_RECOMPUTE_CSUM** + * Automatically update *skb*\ **->csum** after storing the + * bytes. + * **BPF_F_INVALIDATE_HASH** + * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\ + * **->l4hash** to 0. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers @@ -2046,7 +2163,8 @@ union bpf_attr { * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates - * the checksum is to be computed against a pseudo-header. + * that the modified header field is part of the pseudo-header. + * Flag **BPF_F_IPV6** should be set for IPv6 packets. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more @@ -2393,7 +2511,7 @@ union bpf_attr { * into it. An example is available in file * *samples/bpf/trace_output_user.c* in the Linux kernel source * tree (the eBPF program counterpart is in - * *samples/bpf/trace_output_kern.c*). + * *samples/bpf/trace_output.bpf.c*). * * **bpf_perf_event_output**\ () achieves better performance * than **bpf_trace_printk**\ () for sharing data with user @@ -4555,7 +4673,9 @@ union bpf_attr { * Description * Discard reserved ring buffer sample, pointed to by *data*. * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification - * of new data availability is sent. + * of new data availability is sent. Discarded records remain in + * the ring buffer until consumed by user space, so a later submit + * using adaptive wakeup might not wake up the consumer. * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification * of new data availability is sent unconditionally. * If **0** is specified in *flags*, an adaptive notification @@ -4833,7 +4953,7 @@ union bpf_attr { * * **-ENOENT** if the bpf_local_storage cannot be found. * - * long bpf_d_path(struct path *path, char *buf, u32 sz) + * long bpf_d_path(const struct path *path, char *buf, u32 sz) * Description * Return full path for given **struct path** object, which * needs to be the kernel BTF *path* object. The path is @@ -4963,6 +5083,9 @@ union bpf_attr { * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * + * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during + * the netns switch. + * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the * ingress hook and for veth and netkit target device types. The @@ -5557,7 +5680,7 @@ union bpf_attr { * Return * *sk* if casting is valid, or **NULL** otherwise. * - * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) + * long bpf_dynptr_from_mem(void *data, u64 size, u64 flags, struct bpf_dynptr *ptr) * Description * Get a dynptr to local memory *data*. * @@ -5600,7 +5723,7 @@ union bpf_attr { * Return * Nothing. Always succeeds. * - * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags) + * long bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr *src, u64 offset, u64 flags) * Description * Read *len* bytes from *src* into *dst*, starting from *offset* * into *src*. @@ -5610,7 +5733,7 @@ union bpf_attr { * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if * *flags* is not 0. * - * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) + * long bpf_dynptr_write(const struct bpf_dynptr *dst, u64 offset, void *src, u64 len, u64 flags) * Description * Write *len* bytes from *src* into *dst*, starting from *offset* * into *dst*. @@ -5631,7 +5754,7 @@ union bpf_attr { * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs, * other errors correspond to errors returned by **bpf_skb_store_bytes**\ (). * - * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len) + * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u64 offset, u64 len) * Description * Get a pointer to the underlying dynptr data. * @@ -6019,7 +6142,10 @@ union bpf_attr { FN(user_ringbuf_drain, 209, ##ctx) \ FN(cgrp_storage_get, 210, ##ctx) \ FN(cgrp_storage_delete, 211, ##ctx) \ - /* */ + /* This helper list is effectively frozen. If you are trying to \ + * add a new helper, you should add a kfunc instead which has \ + * less stability guarantees. See Documentation/bpf/kfuncs.rst \ + */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't * know or care about integer value that is now passed as second argument @@ -6057,6 +6183,7 @@ enum { BPF_F_PSEUDO_HDR = (1ULL << 4), BPF_F_MARK_MANGLED_0 = (1ULL << 5), BPF_F_MARK_ENFORCE = (1ULL << 6), + BPF_F_IPV6 = (1ULL << 7), }; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ @@ -6166,6 +6293,7 @@ enum { BPF_RB_RING_SIZE = 1, BPF_RB_CONS_POS = 2, BPF_RB_PROD_POS = 3, + BPF_RB_OVERWRITE_POS = 4, }; /* BPF ring buffer constants */ @@ -6598,6 +6726,7 @@ struct bpf_prog_info { __u32 verified_insns; __u32 attach_btf_obj_id; __u32 attach_btf_id; + __u32 :32; } __attribute__((aligned(8))); struct bpf_map_info { @@ -6617,6 +6746,9 @@ struct bpf_map_info { __u32 btf_value_type_id; __u32 btf_vmlinux_id; __u64 map_extra; + __aligned_u64 hash; + __u32 hash_size; + __u32 :32; } __attribute__((aligned(8))); struct bpf_btf_info { @@ -6636,11 +6768,15 @@ struct bpf_link_info { struct { __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */ __u32 tp_name_len; /* in/out: tp_name buffer len */ + __u32 :32; + __u64 cookie; } raw_tracepoint; struct { __u32 attach_type; __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */ __u32 target_btf_id; /* BTF type id inside the object */ + __u32 :32; + __u64 cookie; } tracing; struct { __u64 cgroup_id; @@ -6712,6 +6848,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from file_name */ __u64 cookie; + __u64 ref_ctr_offset; } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ struct { __aligned_u64 func_name; /* in/out */ @@ -6750,6 +6887,13 @@ struct bpf_link_info { }; } __attribute__((aligned(8))); +struct bpf_token_info { + __u64 allowed_cmds; + __u64 allowed_maps; + __u64 allowed_progs; + __u64 allowed_attachs; +} __attribute__((aligned(8))); + /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed * by user and intended to be used by socket (e.g. to bind to, depends on * attach type). @@ -6913,6 +7057,12 @@ enum { BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, }; +enum { + SK_BPF_CB_TX_TIMESTAMPING = 1<<0, + SK_BPF_CB_MASK = (SK_BPF_CB_TX_TIMESTAMPING - 1) | + SK_BPF_CB_TX_TIMESTAMPING +}; + /* List of known BPF sock_ops operators. * New entries can only be added at the end */ @@ -7025,6 +7175,29 @@ enum { * by the kernel or the * earlier bpf-progs. */ + BPF_SOCK_OPS_TSTAMP_SCHED_CB, /* Called when skb is passing + * through dev layer when + * SK_BPF_CB_TX_TIMESTAMPING + * feature is on. + */ + BPF_SOCK_OPS_TSTAMP_SND_SW_CB, /* Called when skb is about to send + * to the nic when SK_BPF_CB_TX_TIMESTAMPING + * feature is on. + */ + BPF_SOCK_OPS_TSTAMP_SND_HW_CB, /* Called in hardware phase when + * SK_BPF_CB_TX_TIMESTAMPING feature + * is on. + */ + BPF_SOCK_OPS_TSTAMP_ACK_CB, /* Called when all the skbs in the + * same sendmsg call are acked + * when SK_BPF_CB_TX_TIMESTAMPING + * feature is on. + */ + BPF_SOCK_OPS_TSTAMP_SENDMSG_CB, /* Called when every sendmsg syscall + * is triggered. It's used to correlate + * sendmsg timestamp with corresponding + * tskey. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect @@ -7091,6 +7264,9 @@ enum { TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */ TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */ + SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */ + SK_BPF_BYPASS_PROT_MEM = 1010, /* Get or Set sk->sk_bypass_prot_mem */ + }; enum { @@ -7327,6 +7503,10 @@ struct bpf_timer { __u64 __opaque[2]; } __attribute__((aligned(8))); +struct bpf_task_work { + __u64 __opaque; +} __attribute__((aligned(8))); + struct bpf_wq { __u64 __opaque[2]; } __attribute__((aligned(8))); @@ -7532,4 +7712,24 @@ enum bpf_kfunc_flags { BPF_F_PAD_ZEROS = (1ULL << 0), }; +/* + * Values of a BPF_MAP_TYPE_INSN_ARRAY entry must be of this type. + * + * Before the map is used the orig_off field should point to an + * instruction inside the program being loaded. The other fields + * must be set to 0. + * + * After the program is loaded, the xlated_off will be adjusted + * by the verifier to point to the index of the original instruction + * in the xlated program. If the instruction is deleted, it will + * be set to (u32)-1. The jitted_off will be set to the corresponding + * offset in the jitted image of the program. + */ +struct bpf_insn_array_value { + __u32 orig_off; + __u32 xlated_off; + __u32 jitted_off; + __u32 :32; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index ec1798b6d3ff..618167cab4e6 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -8,6 +8,16 @@ #define BTF_MAGIC 0xeB9F #define BTF_VERSION 1 +/* + * BTF layout section consists of a struct btf_layout for each known + * kind at BTF encoding time. + */ +struct btf_layout { + __u8 info_sz; /* size of singular element after btf_type */ + __u8 elem_sz; /* size of each of btf_vlen(t) elements */ + __u16 flags; /* currently unused */ +}; + struct btf_header { __u16 magic; __u8 version; @@ -19,24 +29,29 @@ struct btf_header { __u32 type_len; /* length of type section */ __u32 str_off; /* offset of string section */ __u32 str_len; /* length of string section */ + __u32 layout_off; /* offset of layout section */ + __u32 layout_len; /* length of layout section */ }; -/* Max # of type identifier */ -#define BTF_MAX_TYPE 0x000fffff -/* Max offset into the string section */ -#define BTF_MAX_NAME_OFFSET 0x00ffffff -/* Max # of struct/union/enum members or func args */ -#define BTF_MAX_VLEN 0xffff +enum btf_max { + /* Max possible kind */ + BTF_MAX_KIND = 0x0000007f, + /* Max # of type identifier */ + BTF_MAX_TYPE = 0x000fffff, + /* Max offset into the string section */ + BTF_MAX_NAME_OFFSET = 0x00ffffff, + /* Max # of struct/union/enum members or func args */ + BTF_MAX_VLEN = 0x00ffffff, +}; struct btf_type { __u32 name_off; /* "info" bits arrangement - * bits 0-15: vlen (e.g. # of struct's members) - * bits 16-23: unused - * bits 24-28: kind (e.g. int, ptr, array...etc) - * bits 29-30: unused + * bits 0-23: vlen (e.g. # of struct's members) + * bits 24-30: kind (e.g. int, ptr, array...etc) * bit 31: kind_flag, currently used by - * struct, union, enum, fwd and enum64 + * struct, union, enum, fwd, enum64, + * decl_tag and type_tag */ __u32 info; /* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64. @@ -52,8 +67,8 @@ struct btf_type { }; }; -#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f) -#define BTF_INFO_VLEN(info) ((info) & 0xffff) +#define BTF_INFO_KIND(info) (((info) >> 24) & 0x7f) +#define BTF_INFO_VLEN(info) ((info) & 0xffffff) #define BTF_INFO_KFLAG(info) ((info) >> 31) enum { diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h index e16be0d37746..b8f629ef135f 100644 --- a/tools/include/uapi/linux/const.h +++ b/tools/include/uapi/linux/const.h @@ -33,7 +33,7 @@ * Missing asm support * * __BIT128() would not work in the asm code, as it shifts an - * 'unsigned __init128' data type as direct representation of + * 'unsigned __int128' data type as direct representation of * 128 bit constants is not supported in the gcc compiler, as * they get silently truncated. * diff --git a/tools/include/uapi/linux/coredump.h b/tools/include/uapi/linux/coredump.h new file mode 100644 index 000000000000..dc3789b78af0 --- /dev/null +++ b/tools/include/uapi/linux/coredump.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_COREDUMP_H +#define _UAPI_LINUX_COREDUMP_H + +#include <linux/types.h> + +/** + * coredump_{req,ack} flags + * @COREDUMP_KERNEL: kernel writes coredump + * @COREDUMP_USERSPACE: userspace writes coredump + * @COREDUMP_REJECT: don't generate coredump + * @COREDUMP_WAIT: wait for coredump server + */ +enum { + COREDUMP_KERNEL = (1ULL << 0), + COREDUMP_USERSPACE = (1ULL << 1), + COREDUMP_REJECT = (1ULL << 2), + COREDUMP_WAIT = (1ULL << 3), +}; + +/** + * struct coredump_req - message kernel sends to userspace + * @size: size of struct coredump_req + * @size_ack: known size of struct coredump_ack on this kernel + * @mask: supported features + * + * When a coredump happens the kernel will connect to the coredump + * socket and send a coredump request to the coredump server. The @size + * member is set to the size of struct coredump_req and provides a hint + * to userspace how much data can be read. Userspace may use MSG_PEEK to + * peek the size of struct coredump_req and then choose to consume it in + * one go. Userspace may also simply read a COREDUMP_ACK_SIZE_VER0 + * request. If the size the kernel sends is larger userspace simply + * discards any remaining data. + * + * The coredump_req->mask member is set to the currently know features. + * Userspace may only set coredump_ack->mask to the bits raised by the + * kernel in coredump_req->mask. + * + * The coredump_req->size_ack member is set by the kernel to the size of + * struct coredump_ack the kernel knows. Userspace may only send up to + * coredump_req->size_ack bytes to the kernel and must set + * coredump_ack->size accordingly. + */ +struct coredump_req { + __u32 size; + __u32 size_ack; + __u64 mask; +}; + +enum { + COREDUMP_REQ_SIZE_VER0 = 16U, /* size of first published struct */ +}; + +/** + * struct coredump_ack - message userspace sends to kernel + * @size: size of the struct + * @spare: unused + * @mask: features kernel is supposed to use + * + * The @size member must be set to the size of struct coredump_ack. It + * may never exceed what the kernel returned in coredump_req->size_ack + * but it may of course be smaller (>= COREDUMP_ACK_SIZE_VER0 and <= + * coredump_req->size_ack). + * + * The @mask member must be set to the features the coredump server + * wants the kernel to use. Only bits the kernel returned in + * coredump_req->mask may be set. + */ +struct coredump_ack { + __u32 size; + __u32 spare; + __u64 mask; +}; + +enum { + COREDUMP_ACK_SIZE_VER0 = 16U, /* size of first published struct */ +}; + +/** + * enum coredump_mark - Markers for the coredump socket + * + * The kernel will place a single byte on the coredump socket. The + * markers notify userspace whether the coredump ack succeeded or + * failed. + * + * @COREDUMP_MARK_MINSIZE: the provided coredump_ack size was too small + * @COREDUMP_MARK_MAXSIZE: the provided coredump_ack size was too big + * @COREDUMP_MARK_UNSUPPORTED: the provided coredump_ack mask was invalid + * @COREDUMP_MARK_CONFLICTING: the provided coredump_ack mask has conflicting options + * @COREDUMP_MARK_REQACK: the coredump request and ack was successful + * @__COREDUMP_MARK_MAX: the maximum coredump mark value + */ +enum coredump_mark { + COREDUMP_MARK_REQACK = 0U, + COREDUMP_MARK_MINSIZE = 1U, + COREDUMP_MARK_MAXSIZE = 2U, + COREDUMP_MARK_UNSUPPORTED = 3U, + COREDUMP_MARK_CONFLICTING = 4U, + __COREDUMP_MARK_MAX = (1U << 31), +}; + +#endif /* _UAPI_LINUX_COREDUMP_H */ diff --git a/tools/include/uapi/linux/elf.h b/tools/include/uapi/linux/elf.h new file mode 100644 index 000000000000..5834b83d7f9a --- /dev/null +++ b/tools/include/uapi/linux/elf.h @@ -0,0 +1,524 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_ELF_H +#define _LINUX_ELF_H + +#include <linux/types.h> +#include <linux/elf-em.h> + +/* 32-bit ELF base types. */ +typedef __u32 Elf32_Addr; +typedef __u16 Elf32_Half; +typedef __u32 Elf32_Off; +typedef __s32 Elf32_Sword; +typedef __u32 Elf32_Word; +typedef __u16 Elf32_Versym; + +/* 64-bit ELF base types. */ +typedef __u64 Elf64_Addr; +typedef __u16 Elf64_Half; +typedef __s16 Elf64_SHalf; +typedef __u64 Elf64_Off; +typedef __s32 Elf64_Sword; +typedef __u32 Elf64_Word; +typedef __u64 Elf64_Xword; +typedef __s64 Elf64_Sxword; +typedef __u16 Elf64_Versym; + +/* These constants are for the segment types stored in the image headers */ +#define PT_NULL 0 +#define PT_LOAD 1 +#define PT_DYNAMIC 2 +#define PT_INTERP 3 +#define PT_NOTE 4 +#define PT_SHLIB 5 +#define PT_PHDR 6 +#define PT_TLS 7 /* Thread local storage segment */ +#define PT_LOOS 0x60000000 /* OS-specific */ +#define PT_HIOS 0x6fffffff /* OS-specific */ +#define PT_LOPROC 0x70000000 +#define PT_HIPROC 0x7fffffff +#define PT_GNU_EH_FRAME (PT_LOOS + 0x474e550) +#define PT_GNU_STACK (PT_LOOS + 0x474e551) +#define PT_GNU_RELRO (PT_LOOS + 0x474e552) +#define PT_GNU_PROPERTY (PT_LOOS + 0x474e553) + + +/* ARM MTE memory tag segment type */ +#define PT_AARCH64_MEMTAG_MTE (PT_LOPROC + 0x2) + +/* + * Extended Numbering + * + * If the real number of program header table entries is larger than + * or equal to PN_XNUM(0xffff), it is set to sh_info field of the + * section header at index 0, and PN_XNUM is set to e_phnum + * field. Otherwise, the section header at index 0 is zero + * initialized, if it exists. + * + * Specifications are available in: + * + * - Oracle: Linker and Libraries. + * Part No: 817–1984–19, August 2011. + * https://docs.oracle.com/cd/E18752_01/pdf/817-1984.pdf + * + * - System V ABI AMD64 Architecture Processor Supplement + * Draft Version 0.99.4, + * January 13, 2010. + * http://www.cs.washington.edu/education/courses/cse351/12wi/supp-docs/abi.pdf + */ +#define PN_XNUM 0xffff + +/* These constants define the different elf file types */ +#define ET_NONE 0 +#define ET_REL 1 +#define ET_EXEC 2 +#define ET_DYN 3 +#define ET_CORE 4 +#define ET_LOPROC 0xff00 +#define ET_HIPROC 0xffff + +/* This is the info that is needed to parse the dynamic section of the file */ +#define DT_NULL 0 +#define DT_NEEDED 1 +#define DT_PLTRELSZ 2 +#define DT_PLTGOT 3 +#define DT_HASH 4 +#define DT_STRTAB 5 +#define DT_SYMTAB 6 +#define DT_RELA 7 +#define DT_RELASZ 8 +#define DT_RELAENT 9 +#define DT_STRSZ 10 +#define DT_SYMENT 11 +#define DT_INIT 12 +#define DT_FINI 13 +#define DT_SONAME 14 +#define DT_RPATH 15 +#define DT_SYMBOLIC 16 +#define DT_REL 17 +#define DT_RELSZ 18 +#define DT_RELENT 19 +#define DT_PLTREL 20 +#define DT_DEBUG 21 +#define DT_TEXTREL 22 +#define DT_JMPREL 23 +#define DT_ENCODING 32 +#define OLD_DT_LOOS 0x60000000 +#define DT_LOOS 0x6000000d +#define DT_HIOS 0x6ffff000 +#define DT_VALRNGLO 0x6ffffd00 +#define DT_VALRNGHI 0x6ffffdff +#define DT_ADDRRNGLO 0x6ffffe00 +#define DT_GNU_HASH 0x6ffffef5 +#define DT_ADDRRNGHI 0x6ffffeff +#define DT_VERSYM 0x6ffffff0 +#define DT_RELACOUNT 0x6ffffff9 +#define DT_RELCOUNT 0x6ffffffa +#define DT_FLAGS_1 0x6ffffffb +#define DT_VERDEF 0x6ffffffc +#define DT_VERDEFNUM 0x6ffffffd +#define DT_VERNEED 0x6ffffffe +#define DT_VERNEEDNUM 0x6fffffff +#define OLD_DT_HIOS 0x6fffffff +#define DT_LOPROC 0x70000000 +#define DT_HIPROC 0x7fffffff + +/* This info is needed when parsing the symbol table */ +#define STB_LOCAL 0 +#define STB_GLOBAL 1 +#define STB_WEAK 2 + +#define STN_UNDEF 0 + +#define STT_NOTYPE 0 +#define STT_OBJECT 1 +#define STT_FUNC 2 +#define STT_SECTION 3 +#define STT_FILE 4 +#define STT_COMMON 5 +#define STT_TLS 6 + +#define VER_FLG_BASE 0x1 +#define VER_FLG_WEAK 0x2 + +#define ELF_ST_BIND(x) ((x) >> 4) +#define ELF_ST_TYPE(x) ((x) & 0xf) +#define ELF32_ST_BIND(x) ELF_ST_BIND(x) +#define ELF32_ST_TYPE(x) ELF_ST_TYPE(x) +#define ELF64_ST_BIND(x) ELF_ST_BIND(x) +#define ELF64_ST_TYPE(x) ELF_ST_TYPE(x) + +typedef struct { + Elf32_Sword d_tag; + union { + Elf32_Sword d_val; + Elf32_Addr d_ptr; + } d_un; +} Elf32_Dyn; + +typedef struct { + Elf64_Sxword d_tag; /* entry tag value */ + union { + Elf64_Xword d_val; + Elf64_Addr d_ptr; + } d_un; +} Elf64_Dyn; + +/* The following are used with relocations */ +#define ELF32_R_SYM(x) ((x) >> 8) +#define ELF32_R_TYPE(x) ((x) & 0xff) + +#define ELF64_R_SYM(i) ((i) >> 32) +#define ELF64_R_TYPE(i) ((i) & 0xffffffff) + +typedef struct elf32_rel { + Elf32_Addr r_offset; + Elf32_Word r_info; +} Elf32_Rel; + +typedef struct elf64_rel { + Elf64_Addr r_offset; /* Location at which to apply the action */ + Elf64_Xword r_info; /* index and type of relocation */ +} Elf64_Rel; + +typedef struct elf32_rela { + Elf32_Addr r_offset; + Elf32_Word r_info; + Elf32_Sword r_addend; +} Elf32_Rela; + +typedef struct elf64_rela { + Elf64_Addr r_offset; /* Location at which to apply the action */ + Elf64_Xword r_info; /* index and type of relocation */ + Elf64_Sxword r_addend; /* Constant addend used to compute value */ +} Elf64_Rela; + +typedef struct elf32_sym { + Elf32_Word st_name; + Elf32_Addr st_value; + Elf32_Word st_size; + unsigned char st_info; + unsigned char st_other; + Elf32_Half st_shndx; +} Elf32_Sym; + +typedef struct elf64_sym { + Elf64_Word st_name; /* Symbol name, index in string tbl */ + unsigned char st_info; /* Type and binding attributes */ + unsigned char st_other; /* No defined meaning, 0 */ + Elf64_Half st_shndx; /* Associated section index */ + Elf64_Addr st_value; /* Value of the symbol */ + Elf64_Xword st_size; /* Associated symbol size */ +} Elf64_Sym; + + +#define EI_NIDENT 16 + +typedef struct elf32_hdr { + unsigned char e_ident[EI_NIDENT]; + Elf32_Half e_type; + Elf32_Half e_machine; + Elf32_Word e_version; + Elf32_Addr e_entry; /* Entry point */ + Elf32_Off e_phoff; + Elf32_Off e_shoff; + Elf32_Word e_flags; + Elf32_Half e_ehsize; + Elf32_Half e_phentsize; + Elf32_Half e_phnum; + Elf32_Half e_shentsize; + Elf32_Half e_shnum; + Elf32_Half e_shstrndx; +} Elf32_Ehdr; + +typedef struct elf64_hdr { + unsigned char e_ident[EI_NIDENT]; /* ELF "magic number" */ + Elf64_Half e_type; + Elf64_Half e_machine; + Elf64_Word e_version; + Elf64_Addr e_entry; /* Entry point virtual address */ + Elf64_Off e_phoff; /* Program header table file offset */ + Elf64_Off e_shoff; /* Section header table file offset */ + Elf64_Word e_flags; + Elf64_Half e_ehsize; + Elf64_Half e_phentsize; + Elf64_Half e_phnum; + Elf64_Half e_shentsize; + Elf64_Half e_shnum; + Elf64_Half e_shstrndx; +} Elf64_Ehdr; + +/* These constants define the permissions on sections in the program + header, p_flags. */ +#define PF_R 0x4 +#define PF_W 0x2 +#define PF_X 0x1 + +typedef struct elf32_phdr { + Elf32_Word p_type; + Elf32_Off p_offset; + Elf32_Addr p_vaddr; + Elf32_Addr p_paddr; + Elf32_Word p_filesz; + Elf32_Word p_memsz; + Elf32_Word p_flags; + Elf32_Word p_align; +} Elf32_Phdr; + +typedef struct elf64_phdr { + Elf64_Word p_type; + Elf64_Word p_flags; + Elf64_Off p_offset; /* Segment file offset */ + Elf64_Addr p_vaddr; /* Segment virtual address */ + Elf64_Addr p_paddr; /* Segment physical address */ + Elf64_Xword p_filesz; /* Segment size in file */ + Elf64_Xword p_memsz; /* Segment size in memory */ + Elf64_Xword p_align; /* Segment alignment, file & memory */ +} Elf64_Phdr; + +/* sh_type */ +#define SHT_NULL 0 +#define SHT_PROGBITS 1 +#define SHT_SYMTAB 2 +#define SHT_STRTAB 3 +#define SHT_RELA 4 +#define SHT_HASH 5 +#define SHT_DYNAMIC 6 +#define SHT_NOTE 7 +#define SHT_NOBITS 8 +#define SHT_REL 9 +#define SHT_SHLIB 10 +#define SHT_DYNSYM 11 +#define SHT_NUM 12 +#define SHT_LOPROC 0x70000000 +#define SHT_HIPROC 0x7fffffff +#define SHT_LOUSER 0x80000000 +#define SHT_HIUSER 0xffffffff + +/* sh_flags */ +#define SHF_WRITE 0x1 +#define SHF_ALLOC 0x2 +#define SHF_EXECINSTR 0x4 +#define SHF_RELA_LIVEPATCH 0x00100000 +#define SHF_RO_AFTER_INIT 0x00200000 +#define SHF_MASKPROC 0xf0000000 + +/* special section indexes */ +#define SHN_UNDEF 0 +#define SHN_LORESERVE 0xff00 +#define SHN_LOPROC 0xff00 +#define SHN_HIPROC 0xff1f +#define SHN_LIVEPATCH 0xff20 +#define SHN_ABS 0xfff1 +#define SHN_COMMON 0xfff2 +#define SHN_HIRESERVE 0xffff + +typedef struct elf32_shdr { + Elf32_Word sh_name; + Elf32_Word sh_type; + Elf32_Word sh_flags; + Elf32_Addr sh_addr; + Elf32_Off sh_offset; + Elf32_Word sh_size; + Elf32_Word sh_link; + Elf32_Word sh_info; + Elf32_Word sh_addralign; + Elf32_Word sh_entsize; +} Elf32_Shdr; + +typedef struct elf64_shdr { + Elf64_Word sh_name; /* Section name, index in string tbl */ + Elf64_Word sh_type; /* Type of section */ + Elf64_Xword sh_flags; /* Miscellaneous section attributes */ + Elf64_Addr sh_addr; /* Section virtual addr at execution */ + Elf64_Off sh_offset; /* Section file offset */ + Elf64_Xword sh_size; /* Size of section in bytes */ + Elf64_Word sh_link; /* Index of another section */ + Elf64_Word sh_info; /* Additional section information */ + Elf64_Xword sh_addralign; /* Section alignment */ + Elf64_Xword sh_entsize; /* Entry size if section holds table */ +} Elf64_Shdr; + +#define EI_MAG0 0 /* e_ident[] indexes */ +#define EI_MAG1 1 +#define EI_MAG2 2 +#define EI_MAG3 3 +#define EI_CLASS 4 +#define EI_DATA 5 +#define EI_VERSION 6 +#define EI_OSABI 7 +#define EI_PAD 8 + +#define ELFMAG0 0x7f /* EI_MAG */ +#define ELFMAG1 'E' +#define ELFMAG2 'L' +#define ELFMAG3 'F' +#define ELFMAG "\177ELF" +#define SELFMAG 4 + +#define ELFCLASSNONE 0 /* EI_CLASS */ +#define ELFCLASS32 1 +#define ELFCLASS64 2 +#define ELFCLASSNUM 3 + +#define ELFDATANONE 0 /* e_ident[EI_DATA] */ +#define ELFDATA2LSB 1 +#define ELFDATA2MSB 2 + +#define EV_NONE 0 /* e_version, EI_VERSION */ +#define EV_CURRENT 1 +#define EV_NUM 2 + +#define ELFOSABI_NONE 0 +#define ELFOSABI_LINUX 3 + +#ifndef ELF_OSABI +#define ELF_OSABI ELFOSABI_NONE +#endif + +/* + * Notes used in ET_CORE. Architectures export some of the arch register sets + * using the corresponding note types via the PTRACE_GETREGSET and + * PTRACE_SETREGSET requests. + * The note name for these types is "LINUX", except NT_PRFPREG that is named + * "CORE". + */ +#define NT_PRSTATUS 1 +#define NT_PRFPREG 2 +#define NT_PRPSINFO 3 +#define NT_TASKSTRUCT 4 +#define NT_AUXV 6 +/* + * Note to userspace developers: size of NT_SIGINFO note may increase + * in the future to accomodate more fields, don't assume it is fixed! + */ +#define NT_SIGINFO 0x53494749 +#define NT_FILE 0x46494c45 +#define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ +#define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */ +#define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */ +#define NT_PPC_VSX 0x102 /* PowerPC VSX registers */ +#define NT_PPC_TAR 0x103 /* Target Address Register */ +#define NT_PPC_PPR 0x104 /* Program Priority Register */ +#define NT_PPC_DSCR 0x105 /* Data Stream Control Register */ +#define NT_PPC_EBB 0x106 /* Event Based Branch Registers */ +#define NT_PPC_PMU 0x107 /* Performance Monitor Registers */ +#define NT_PPC_TM_CGPR 0x108 /* TM checkpointed GPR Registers */ +#define NT_PPC_TM_CFPR 0x109 /* TM checkpointed FPR Registers */ +#define NT_PPC_TM_CVMX 0x10a /* TM checkpointed VMX Registers */ +#define NT_PPC_TM_CVSX 0x10b /* TM checkpointed VSX Registers */ +#define NT_PPC_TM_SPR 0x10c /* TM Special Purpose Registers */ +#define NT_PPC_TM_CTAR 0x10d /* TM checkpointed Target Address Register */ +#define NT_PPC_TM_CPPR 0x10e /* TM checkpointed Program Priority Register */ +#define NT_PPC_TM_CDSCR 0x10f /* TM checkpointed Data Stream Control Register */ +#define NT_PPC_PKEY 0x110 /* Memory Protection Keys registers */ +#define NT_PPC_DEXCR 0x111 /* PowerPC DEXCR registers */ +#define NT_PPC_HASHKEYR 0x112 /* PowerPC HASHKEYR register */ +#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ +#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */ +#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ +/* Old binutils treats 0x203 as a CET state */ +#define NT_X86_SHSTK 0x204 /* x86 SHSTK state */ +#define NT_X86_XSAVE_LAYOUT 0x205 /* XSAVE layout description */ +#define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */ +#define NT_S390_TIMER 0x301 /* s390 timer register */ +#define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */ +#define NT_S390_TODPREG 0x303 /* s390 TOD programmable register */ +#define NT_S390_CTRS 0x304 /* s390 control registers */ +#define NT_S390_PREFIX 0x305 /* s390 prefix register */ +#define NT_S390_LAST_BREAK 0x306 /* s390 breaking event address */ +#define NT_S390_SYSTEM_CALL 0x307 /* s390 system call restart data */ +#define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */ +#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */ +#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ +#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */ +#define NT_S390_GS_BC 0x30c /* s390 guarded storage broadcast control block */ +#define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */ +#define NT_S390_PV_CPU_DATA 0x30e /* s390 protvirt cpu dump data */ +#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ +#define NT_ARM_TLS 0x401 /* ARM TLS register */ +#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ +#define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ +#define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ +#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */ +#define NT_ARM_PAC_MASK 0x406 /* ARM pointer authentication code masks */ +#define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */ +#define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */ +#define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */ +#define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */ +#define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */ +#define NT_ARM_ZA 0x40c /* ARM SME ZA registers */ +#define NT_ARM_ZT 0x40d /* ARM SME ZT registers */ +#define NT_ARM_FPMR 0x40e /* ARM floating point mode register */ +#define NT_ARM_POE 0x40f /* ARM POE registers */ +#define NT_ARM_GCS 0x410 /* ARM GCS state */ +#define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ +#define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ +#define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ +#define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */ +#define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */ +#define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */ +#define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */ +#define NT_RISCV_TAGGED_ADDR_CTRL 0x902 /* RISC-V tagged address control (prctl()) */ +#define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */ +#define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers */ +#define NT_LOONGARCH_LSX 0xa02 /* LoongArch Loongson SIMD Extension registers */ +#define NT_LOONGARCH_LASX 0xa03 /* LoongArch Loongson Advanced SIMD Extension registers */ +#define NT_LOONGARCH_LBT 0xa04 /* LoongArch Loongson Binary Translation registers */ +#define NT_LOONGARCH_HW_BREAK 0xa05 /* LoongArch hardware breakpoint registers */ +#define NT_LOONGARCH_HW_WATCH 0xa06 /* LoongArch hardware watchpoint registers */ + +/* Note types with note name "GNU" */ +#define NT_GNU_PROPERTY_TYPE_0 5 + +/* Note header in a PT_NOTE section */ +typedef struct elf32_note { + Elf32_Word n_namesz; /* Name size */ + Elf32_Word n_descsz; /* Content size */ + Elf32_Word n_type; /* Content type */ +} Elf32_Nhdr; + +/* Note header in a PT_NOTE section */ +typedef struct elf64_note { + Elf64_Word n_namesz; /* Name size */ + Elf64_Word n_descsz; /* Content size */ + Elf64_Word n_type; /* Content type */ +} Elf64_Nhdr; + +/* .note.gnu.property types for EM_AARCH64: */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 + +/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) + +typedef struct { + Elf32_Half vd_version; + Elf32_Half vd_flags; + Elf32_Half vd_ndx; + Elf32_Half vd_cnt; + Elf32_Word vd_hash; + Elf32_Word vd_aux; + Elf32_Word vd_next; +} Elf32_Verdef; + +typedef struct { + Elf64_Half vd_version; + Elf64_Half vd_flags; + Elf64_Half vd_ndx; + Elf64_Half vd_cnt; + Elf64_Word vd_hash; + Elf64_Word vd_aux; + Elf64_Word vd_next; +} Elf64_Verdef; + +typedef struct { + Elf32_Word vda_name; + Elf32_Word vda_next; +} Elf32_Verdaux; + +typedef struct { + Elf64_Word vda_name; + Elf64_Word vda_next; +} Elf64_Verdaux; + +#endif /* _LINUX_ELF_H */ diff --git a/tools/include/uapi/linux/fadvise.h b/tools/include/uapi/linux/fadvise.h deleted file mode 100644 index 0862b87434c2..000000000000 --- a/tools/include/uapi/linux/fadvise.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef FADVISE_H_INCLUDED -#define FADVISE_H_INCLUDED - -#define POSIX_FADV_NORMAL 0 /* No further special treatment. */ -#define POSIX_FADV_RANDOM 1 /* Expect random page references. */ -#define POSIX_FADV_SEQUENTIAL 2 /* Expect sequential page references. */ -#define POSIX_FADV_WILLNEED 3 /* Will need these pages. */ - -/* - * The advise values for POSIX_FADV_DONTNEED and POSIX_ADV_NOREUSE - * for s390-64 differ from the values for the rest of the world. - */ -#if defined(__s390x__) -#define POSIX_FADV_DONTNEED 6 /* Don't need these pages. */ -#define POSIX_FADV_NOREUSE 7 /* Data will be accessed once. */ -#else -#define POSIX_FADV_DONTNEED 4 /* Don't need these pages. */ -#define POSIX_FADV_NOREUSE 5 /* Data will be accessed once. */ -#endif - -#endif /* FADVISE_H_INCLUDED */ diff --git a/tools/include/uapi/linux/fanotify.h b/tools/include/uapi/linux/fanotify.h new file mode 100644 index 000000000000..e710967c7c26 --- /dev/null +++ b/tools/include/uapi/linux/fanotify.h @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_FANOTIFY_H +#define _UAPI_LINUX_FANOTIFY_H + +#include <linux/types.h> + +/* the following events that user-space can register for */ +#define FAN_ACCESS 0x00000001 /* File was accessed */ +#define FAN_MODIFY 0x00000002 /* File was modified */ +#define FAN_ATTRIB 0x00000004 /* Metadata changed */ +#define FAN_CLOSE_WRITE 0x00000008 /* Writable file closed */ +#define FAN_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */ +#define FAN_OPEN 0x00000020 /* File was opened */ +#define FAN_MOVED_FROM 0x00000040 /* File was moved from X */ +#define FAN_MOVED_TO 0x00000080 /* File was moved to Y */ +#define FAN_CREATE 0x00000100 /* Subfile was created */ +#define FAN_DELETE 0x00000200 /* Subfile was deleted */ +#define FAN_DELETE_SELF 0x00000400 /* Self was deleted */ +#define FAN_MOVE_SELF 0x00000800 /* Self was moved */ +#define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */ + +#define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +#define FAN_FS_ERROR 0x00008000 /* Filesystem error */ + +#define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ +#define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ +#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ +/* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ + +#define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */ +#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */ +#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */ + +#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ + +#define FAN_RENAME 0x10000000 /* File was renamed */ + +#define FAN_ONDIR 0x40000000 /* Event occurred against dir */ + +/* helper events */ +#define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ +#define FAN_MOVE (FAN_MOVED_FROM | FAN_MOVED_TO) /* moves */ + +/* flags used for fanotify_init() */ +#define FAN_CLOEXEC 0x00000001 +#define FAN_NONBLOCK 0x00000002 + +/* These are NOT bitwise flags. Both bits are used together. */ +#define FAN_CLASS_NOTIF 0x00000000 +#define FAN_CLASS_CONTENT 0x00000004 +#define FAN_CLASS_PRE_CONTENT 0x00000008 + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ + FAN_CLASS_PRE_CONTENT) + +#define FAN_UNLIMITED_QUEUE 0x00000010 +#define FAN_UNLIMITED_MARKS 0x00000020 +#define FAN_ENABLE_AUDIT 0x00000040 + +/* Flags to determine fanotify event format */ +#define FAN_REPORT_PIDFD 0x00000080 /* Report pidfd for event->pid */ +#define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */ +#define FAN_REPORT_FID 0x00000200 /* Report unique file id */ +#define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ +#define FAN_REPORT_NAME 0x00000800 /* Report events with name */ +#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ +#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */ +#define FAN_REPORT_MNT 0x00004000 /* Report mount events */ + +/* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ +#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) +/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */ +#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \ + FAN_REPORT_FID | FAN_REPORT_TARGET_FID) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ + FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\ + FAN_UNLIMITED_MARKS) + +/* flags used for fanotify_modify_mark() */ +#define FAN_MARK_ADD 0x00000001 +#define FAN_MARK_REMOVE 0x00000002 +#define FAN_MARK_DONT_FOLLOW 0x00000004 +#define FAN_MARK_ONLYDIR 0x00000008 +/* FAN_MARK_MOUNT is 0x00000010 */ +#define FAN_MARK_IGNORED_MASK 0x00000020 +#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 +#define FAN_MARK_FLUSH 0x00000080 +/* FAN_MARK_FILESYSTEM is 0x00000100 */ +#define FAN_MARK_EVICTABLE 0x00000200 +/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */ +#define FAN_MARK_IGNORE 0x00000400 + +/* These are NOT bitwise flags. Both bits can be used togther. */ +#define FAN_MARK_INODE 0x00000000 +#define FAN_MARK_MOUNT 0x00000010 +#define FAN_MARK_FILESYSTEM 0x00000100 +#define FAN_MARK_MNTNS 0x00000110 + +/* + * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY + * for non-inode mark types. + */ +#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ + FAN_MARK_REMOVE |\ + FAN_MARK_DONT_FOLLOW |\ + FAN_MARK_ONLYDIR |\ + FAN_MARK_MOUNT |\ + FAN_MARK_IGNORED_MASK |\ + FAN_MARK_IGNORED_SURV_MODIFY |\ + FAN_MARK_FLUSH) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_EVENTS (FAN_ACCESS |\ + FAN_MODIFY |\ + FAN_CLOSE |\ + FAN_OPEN) + +/* + * All events which require a permission response from userspace + */ +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\ + FAN_ACCESS_PERM) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\ + FAN_ALL_PERM_EVENTS |\ + FAN_Q_OVERFLOW) + +#define FANOTIFY_METADATA_VERSION 3 + +struct fanotify_event_metadata { + __u32 event_len; + __u8 vers; + __u8 reserved; + __u16 metadata_len; + __aligned_u64 mask; + __s32 fd; + __s32 pid; +}; + +#define FAN_EVENT_INFO_TYPE_FID 1 +#define FAN_EVENT_INFO_TYPE_DFID_NAME 2 +#define FAN_EVENT_INFO_TYPE_DFID 3 +#define FAN_EVENT_INFO_TYPE_PIDFD 4 +#define FAN_EVENT_INFO_TYPE_ERROR 5 +#define FAN_EVENT_INFO_TYPE_RANGE 6 +#define FAN_EVENT_INFO_TYPE_MNT 7 + +/* Special info types for FAN_RENAME */ +#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10 +/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID 11 */ +#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME 12 +/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID 13 */ + +/* Variable length info record following event metadata */ +struct fanotify_event_info_header { + __u8 info_type; + __u8 pad; + __u16 len; +}; + +/* + * Unique file identifier info record. + * This structure is used for records of types FAN_EVENT_INFO_TYPE_FID, + * FAN_EVENT_INFO_TYPE_DFID and FAN_EVENT_INFO_TYPE_DFID_NAME. + * For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null terminated + * name immediately after the file handle. + */ +struct fanotify_event_info_fid { + struct fanotify_event_info_header hdr; + __kernel_fsid_t fsid; + /* + * Following is an opaque struct file_handle that can be passed as + * an argument to open_by_handle_at(2). + */ + unsigned char handle[]; +}; + +/* + * This structure is used for info records of type FAN_EVENT_INFO_TYPE_PIDFD. + * It holds a pidfd for the pid that was responsible for generating an event. + */ +struct fanotify_event_info_pidfd { + struct fanotify_event_info_header hdr; + __s32 pidfd; +}; + +struct fanotify_event_info_error { + struct fanotify_event_info_header hdr; + __s32 error; + __u32 error_count; +}; + +struct fanotify_event_info_range { + struct fanotify_event_info_header hdr; + __u32 pad; + __u64 offset; + __u64 count; +}; + +struct fanotify_event_info_mnt { + struct fanotify_event_info_header hdr; + __u64 mnt_id; +}; + +/* + * User space may need to record additional information about its decision. + * The extra information type records what kind of information is included. + * The default is none. We also define an extra information buffer whose + * size is determined by the extra information type. + * + * If the information type is Audit Rule, then the information following + * is the rule number that triggered the user space decision that + * requires auditing. + */ + +#define FAN_RESPONSE_INFO_NONE 0 +#define FAN_RESPONSE_INFO_AUDIT_RULE 1 + +struct fanotify_response { + __s32 fd; + __u32 response; +}; + +struct fanotify_response_info_header { + __u8 type; + __u8 pad; + __u16 len; +}; + +struct fanotify_response_info_audit_rule { + struct fanotify_response_info_header hdr; + __u32 rule_number; + __u32 subj_trust; + __u32 obj_trust; +}; + +/* Legit userspace responses to a _PERM event */ +#define FAN_ALLOW 0x01 +#define FAN_DENY 0x02 +/* errno other than EPERM can specified in upper byte of deny response */ +#define FAN_ERRNO_BITS 8 +#define FAN_ERRNO_SHIFT (32 - FAN_ERRNO_BITS) +#define FAN_ERRNO_MASK ((1 << FAN_ERRNO_BITS) - 1) +#define FAN_DENY_ERRNO(err) \ + (FAN_DENY | ((((__u32)(err)) & FAN_ERRNO_MASK) << FAN_ERRNO_SHIFT)) + +#define FAN_AUDIT 0x10 /* Bitmask to create audit record for result */ +#define FAN_INFO 0x20 /* Bitmask to indicate additional information */ + +/* No fd set in event */ +#define FAN_NOFD -1 +#define FAN_NOPIDFD FAN_NOFD +#define FAN_EPIDFD -2 + +/* Helper functions to deal with fanotify_event_metadata buffers */ +#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata)) + +#define FAN_EVENT_NEXT(meta, len) ((len) -= (meta)->event_len, \ + (struct fanotify_event_metadata*)(((char *)(meta)) + \ + (meta)->event_len)) + +#define FAN_EVENT_OK(meta, len) ((long)(len) >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len <= (long)(len)) + +#endif /* _UAPI_LINUX_FANOTIFY_H */ diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index 8a27bc5c7a7f..24ddf7bc4f25 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -40,6 +40,15 @@ #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) +/* flags for integrity meta */ +#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */ +#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */ +#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */ + +#define IO_INTEGRITY_VALID_FLAGS (IO_INTEGRITY_CHK_GUARD | \ + IO_INTEGRITY_CHK_REFTAG | \ + IO_INTEGRITY_CHK_APPTAG) + #define SEEK_SET 0 /* seek relative to beginning of file */ #define SEEK_CUR 1 /* seek relative to current file position */ #define SEEK_END 2 /* seek relative to end of file */ @@ -329,9 +338,16 @@ typedef int __bitwise __kernel_rwf_t; /* per-IO negation of O_APPEND */ #define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020) +/* Atomic Write */ +#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) + +/* buffered IO that drops the cache after reading or writing data */ +#define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND | RWF_NOAPPEND) + RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\ + RWF_DONTCACHE) #define PROCFS_IOCTL_MAGIC 'f' @@ -347,6 +363,7 @@ typedef int __bitwise __kernel_rwf_t; #define PAGE_IS_PFNZERO (1 << 5) #define PAGE_IS_HUGE (1 << 6) #define PAGE_IS_SOFT_DIRTY (1 << 7) +#define PAGE_IS_GUARD (1 << 8) /* * struct page_region - Page region with flags diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index 7a8f4c290187..3aff99f2696a 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -119,7 +119,7 @@ struct fscrypt_key_specifier { */ struct fscrypt_provisioning_key_payload { __u32 type; - __u32 __reserved; + __u32 flags; __u8 raw[]; }; @@ -128,7 +128,9 @@ struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; __u32 key_id; - __u32 __reserved[8]; +#define FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001 + __u32 flags; + __u32 __reserved[7]; __u8 raw[]; }; diff --git a/tools/include/uapi/linux/genetlink.h b/tools/include/uapi/linux/genetlink.h new file mode 100644 index 000000000000..ddba3ca01e39 --- /dev/null +++ b/tools/include/uapi/linux/genetlink.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_GENERIC_NETLINK_H +#define _UAPI__LINUX_GENERIC_NETLINK_H + +#include <linux/types.h> +#include <linux/netlink.h> + +#define GENL_NAMSIZ 16 /* length of family name */ + +#define GENL_MIN_ID NLMSG_MIN_TYPE +#define GENL_MAX_ID 1023 + +struct genlmsghdr { + __u8 cmd; + __u8 version; + __u16 reserved; +}; + +#define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr)) + +#define GENL_ADMIN_PERM 0x01 +#define GENL_CMD_CAP_DO 0x02 +#define GENL_CMD_CAP_DUMP 0x04 +#define GENL_CMD_CAP_HASPOL 0x08 +#define GENL_UNS_ADMIN_PERM 0x10 + +/* + * List of reserved static generic netlink identifiers: + */ +#define GENL_ID_CTRL NLMSG_MIN_TYPE +#define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) +#define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2) +/* must be last reserved + 1 */ +#define GENL_START_ALLOC (NLMSG_MIN_TYPE + 3) + +/************************************************************************** + * Controller + **************************************************************************/ + +enum { + CTRL_CMD_UNSPEC, + CTRL_CMD_NEWFAMILY, + CTRL_CMD_DELFAMILY, + CTRL_CMD_GETFAMILY, + CTRL_CMD_NEWOPS, + CTRL_CMD_DELOPS, + CTRL_CMD_GETOPS, + CTRL_CMD_NEWMCAST_GRP, + CTRL_CMD_DELMCAST_GRP, + CTRL_CMD_GETMCAST_GRP, /* unused */ + CTRL_CMD_GETPOLICY, + __CTRL_CMD_MAX, +}; + +#define CTRL_CMD_MAX (__CTRL_CMD_MAX - 1) + +enum { + CTRL_ATTR_UNSPEC, + CTRL_ATTR_FAMILY_ID, + CTRL_ATTR_FAMILY_NAME, + CTRL_ATTR_VERSION, + CTRL_ATTR_HDRSIZE, + CTRL_ATTR_MAXATTR, + CTRL_ATTR_OPS, + CTRL_ATTR_MCAST_GROUPS, + CTRL_ATTR_POLICY, + CTRL_ATTR_OP_POLICY, + CTRL_ATTR_OP, + __CTRL_ATTR_MAX, +}; + +#define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1) + +enum { + CTRL_ATTR_OP_UNSPEC, + CTRL_ATTR_OP_ID, + CTRL_ATTR_OP_FLAGS, + __CTRL_ATTR_OP_MAX, +}; + +#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1) + +enum { + CTRL_ATTR_MCAST_GRP_UNSPEC, + CTRL_ATTR_MCAST_GRP_NAME, + CTRL_ATTR_MCAST_GRP_ID, + __CTRL_ATTR_MCAST_GRP_MAX, +}; + +#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1) + +enum { + CTRL_ATTR_POLICY_UNSPEC, + CTRL_ATTR_POLICY_DO, + CTRL_ATTR_POLICY_DUMP, + + __CTRL_ATTR_POLICY_DUMP_MAX, + CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1 +}; + +#define CTRL_ATTR_POLICY_MAX (__CTRL_ATTR_POLICY_DUMP_MAX - 1) + +#endif /* _UAPI__LINUX_GENERIC_NETLINK_H */ diff --git a/tools/include/uapi/linux/if_addr.h b/tools/include/uapi/linux/if_addr.h new file mode 100644 index 000000000000..aa7958b4e41d --- /dev/null +++ b/tools/include/uapi/linux/if_addr.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_IF_ADDR_H +#define _UAPI__LINUX_IF_ADDR_H + +#include <linux/types.h> +#include <linux/netlink.h> + +struct ifaddrmsg { + __u8 ifa_family; + __u8 ifa_prefixlen; /* The prefix length */ + __u8 ifa_flags; /* Flags */ + __u8 ifa_scope; /* Address scope */ + __u32 ifa_index; /* Link index */ +}; + +/* + * Important comment: + * IFA_ADDRESS is prefix address, rather than local interface address. + * It makes no difference for normally configured broadcast interfaces, + * but for point-to-point IFA_ADDRESS is DESTINATION address, + * local address is supplied in IFA_LOCAL attribute. + * + * IFA_FLAGS is a u32 attribute that extends the u8 field ifa_flags. + * If present, the value from struct ifaddrmsg will be ignored. + */ +enum { + IFA_UNSPEC, + IFA_ADDRESS, + IFA_LOCAL, + IFA_LABEL, + IFA_BROADCAST, + IFA_ANYCAST, + IFA_CACHEINFO, + IFA_MULTICAST, + IFA_FLAGS, + IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */ + IFA_TARGET_NETNSID, + IFA_PROTO, /* u8, address protocol */ + __IFA_MAX, +}; + +#define IFA_MAX (__IFA_MAX - 1) + +/* ifa_flags */ +#define IFA_F_SECONDARY 0x01 +#define IFA_F_TEMPORARY IFA_F_SECONDARY + +#define IFA_F_NODAD 0x02 +#define IFA_F_OPTIMISTIC 0x04 +#define IFA_F_DADFAILED 0x08 +#define IFA_F_HOMEADDRESS 0x10 +#define IFA_F_DEPRECATED 0x20 +#define IFA_F_TENTATIVE 0x40 +#define IFA_F_PERMANENT 0x80 +#define IFA_F_MANAGETEMPADDR 0x100 +#define IFA_F_NOPREFIXROUTE 0x200 +#define IFA_F_MCAUTOJOIN 0x400 +#define IFA_F_STABLE_PRIVACY 0x800 + +struct ifa_cacheinfo { + __u32 ifa_prefered; + __u32 ifa_valid; + __u32 cstamp; /* created timestamp, hundredths of seconds */ + __u32 tstamp; /* updated timestamp, hundredths of seconds */ +}; + +/* backwards compatibility for userspace */ +#ifndef __KERNEL__ +#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg)))) +#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg)) +#endif + +/* ifa_proto */ +#define IFAPROT_UNSPEC 0 +#define IFAPROT_KERNEL_LO 1 /* loopback */ +#define IFAPROT_KERNEL_RA 2 /* set by kernel from router announcement */ +#define IFAPROT_KERNEL_LL 3 /* link-local set by kernel */ + +#endif diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 7e46ca4cd31b..757ce5e9426e 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -1526,6 +1526,8 @@ enum { IFLA_BOND_MISSED_MAX, IFLA_BOND_NS_IP6_TARGET, IFLA_BOND_COUPLED_CONTROL, + IFLA_BOND_BROADCAST_NEIGH, + IFLA_BOND_LACP_STRICT, __IFLA_BOND_MAX, }; diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index 42ec5ddaab8d..23a062781468 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -7,8 +7,8 @@ * Magnus Karlsson <magnus.karlsson@intel.com> */ -#ifndef _LINUX_IF_XDP_H -#define _LINUX_IF_XDP_H +#ifndef _UAPI_LINUX_IF_XDP_H +#define _UAPI_LINUX_IF_XDP_H #include <linux/types.h> @@ -79,6 +79,7 @@ struct xdp_mmap_offsets { #define XDP_UMEM_COMPLETION_RING 6 #define XDP_STATISTICS 7 #define XDP_OPTIONS 8 +#define XDP_MAX_TX_SKB_BUDGET 9 struct xdp_umem_reg { __u64 addr; /* Start of packet data area */ @@ -127,6 +128,12 @@ struct xdp_options { */ #define XDP_TXMD_FLAGS_CHECKSUM (1 << 1) +/* Request launch time hardware offload. The device will schedule the packet for + * transmission at a pre-determined time called launch time. The value of + * launch time is communicated via launch_time field of struct xsk_tx_metadata. + */ +#define XDP_TXMD_FLAGS_LAUNCH_TIME (1 << 2) + /* AF_XDP offloads request. 'request' union member is consumed by the driver * when the packet is being transmitted. 'completion' union member is * filled by the driver when the transmit completion arrives. @@ -142,6 +149,10 @@ struct xsk_tx_metadata { __u16 csum_start; /* Offset from csum_start where checksum should be stored. */ __u16 csum_offset; + + /* XDP_TXMD_FLAGS_LAUNCH_TIME */ + /* Launch time in nanosecond against the PTP HW Clock */ + __u64 launch_time; } request; struct { @@ -170,4 +181,4 @@ struct xdp_desc { /* TX packet carries valid metadata. */ #define XDP_TX_METADATA (1 << 1) -#endif /* _LINUX_IF_XDP_H */ +#endif /* _UAPI_LINUX_IF_XDP_H */ diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 5d32d53508d9..ced0fc3c3aa5 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -79,6 +79,8 @@ enum { #define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ #define IPPROTO_ETHERNET IPPROTO_ETHERNET + IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */ +#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_SMC = 256, /* Shared Memory Communications */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 502ea63b5d2e..d0c0c8605976 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -14,6 +14,10 @@ #include <linux/ioctl.h> #include <asm/kvm.h> +#ifdef __KERNEL__ +#include <linux/kvm_types.h> +#endif + #define KVM_API_VERSION 12 /* @@ -135,6 +139,12 @@ struct kvm_xen_exit { } u; }; +struct kvm_exit_snp_req_certs { + __u64 gpa; + __u64 npages; + __u64 ret; +}; + #define KVM_S390_GET_SKEYS_NONE 1 #define KVM_S390_SKEYS_MAX 1048576 @@ -178,6 +188,10 @@ struct kvm_xen_exit { #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 +#define KVM_EXIT_TDX 40 +#define KVM_EXIT_ARM_SEA 41 +#define KVM_EXIT_ARM_LDST64B 42 +#define KVM_EXIT_SNP_REQ_CERTS 43 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -375,6 +389,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_WAKEUP 4 #define KVM_SYSTEM_EVENT_SUSPEND 5 #define KVM_SYSTEM_EVENT_SEV_TERM 6 +#define KVM_SYSTEM_EVENT_TDX_FATAL 7 __u32 type; __u32 ndata; union { @@ -399,7 +414,7 @@ struct kvm_run { } eoi; /* KVM_EXIT_HYPERV */ struct kvm_hyperv_exit hyperv; - /* KVM_EXIT_ARM_NISV */ + /* KVM_EXIT_ARM_NISV / KVM_EXIT_ARM_LDST64B */ struct { __u64 esr_iss; __u64 fault_ipa; @@ -446,6 +461,41 @@ struct kvm_run { __u64 gpa; __u64 size; } memory_fault; + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + __u64 ret; + __u64 data[5]; + } unknown; + struct { + __u64 ret; + __u64 gpa; + __u64 size; + } get_quote; + struct { + __u64 ret; + __u64 leaf; + __u64 r11, r12, r13, r14; + } get_tdvmcall_info; + struct { + __u64 ret; + __u64 vector; + } setup_event_notify; + }; + } tdx; + /* KVM_EXIT_ARM_SEA */ + struct { +#define KVM_EXIT_ARM_SEA_FLAG_GPA_VALID (1ULL << 0) + __u64 flags; + __u64 esr; + __u64 gva; + __u64 gpa; + } arm_sea; + /* KVM_EXIT_SNP_REQ_CERTS */ + struct kvm_exit_snp_req_certs snp_req_certs; /* Fix the size of the union. */ char padding[256]; }; @@ -617,10 +667,7 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) -#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HLT | \ - KVM_X86_DISABLE_EXITS_PAUSE | \ - KVM_X86_DISABLE_EXITS_CSTATE) +#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { @@ -933,6 +980,15 @@ struct kvm_enable_cap { #define KVM_CAP_PRE_FAULT_MEMORY 236 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 +#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 +#define KVM_CAP_ARM_EL2 240 +#define KVM_CAP_ARM_EL2_E2H0 241 +#define KVM_CAP_RISCV_MP_STATE_RESET 242 +#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 +#define KVM_CAP_GUEST_MEMFD_FLAGS 244 +#define KVM_CAP_ARM_SEA_TO_USER 245 +#define KVM_CAP_S390_USER_OPEREXEC 246 +#define KVM_CAP_S390_KEYOP 247 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1070,6 +1126,10 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + #define KVM_REG_SIZE_U8 0x0000000000000000ULL #define KVM_REG_SIZE_U16 0x0010000000000000ULL #define KVM_REG_SIZE_U32 0x0020000000000000ULL @@ -1164,6 +1224,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_PCHPIC, #define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC + KVM_DEV_TYPE_ARM_VGIC_V5, +#define KVM_DEV_TYPE_ARM_VGIC_V5 KVM_DEV_TYPE_ARM_VGIC_V5 KVM_DEV_TYPE_MAX, @@ -1174,6 +1236,16 @@ struct kvm_vfio_spapr_tce { __s32 tablefd; }; +#define KVM_S390_KEYOP_ISKE 0x01 +#define KVM_S390_KEYOP_RRBE 0x02 +#define KVM_S390_KEYOP_SSKE 0x03 +struct kvm_s390_keyop { + __u64 guest_addr; + __u8 key; + __u8 operation; + __u8 pad[6]; +}; + /* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. @@ -1193,6 +1265,7 @@ struct kvm_vfio_spapr_tce { #define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping) #define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping) #define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) +#define KVM_S390_KEYOP _IOWR(KVMIO, 0x53, struct kvm_s390_keyop) /* Device model IOC */ #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) @@ -1534,7 +1607,11 @@ struct kvm_stats_desc { __u16 size; __u32 offset; __u32 bucket_size; +#ifdef __KERNEL__ + char name[KVM_STATS_NAME_SIZE]; +#else char name[]; +#endif }; #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) @@ -1565,6 +1642,8 @@ struct kvm_memory_attributes { #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) +#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0) +#define GUEST_MEMFD_FLAG_INIT_SHARED (1ULL << 1) struct kvm_create_guest_memfd { __u64 size; diff --git a/tools/include/uapi/linux/module_signature.h b/tools/include/uapi/linux/module_signature.h new file mode 100644 index 000000000000..634c9f1c8fc2 --- /dev/null +++ b/tools/include/uapi/linux/module_signature.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Module signature handling. + * + * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef _UAPI_LINUX_MODULE_SIGNATURE_H +#define _UAPI_LINUX_MODULE_SIGNATURE_H + +#include <linux/types.h> + +/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */ +#define MODULE_SIGNATURE_MARKER "~Module signature appended~\n" + +enum module_signature_type { + MODULE_SIGNATURE_TYPE_PKCS7 = 2, /* Signature in PKCS#7 message */ +}; + +/* + * Module signature information block. + * + * The constituents of the signature section are, in order: + * + * - Signer's name + * - Key identifier + * - Signature data + * - Information block + */ +struct module_signature { + __u8 algo; /* Public-key crypto algorithm [0] */ + __u8 hash; /* Digest algorithm [0] */ + __u8 id_type; /* Key identifier type [enum module_signature_type] */ + __u8 signer_len; /* Length of signer's name [0] */ + __u8 key_id_len; /* Length of key identifier [0] */ + __u8 __pad[3]; + __be32 sig_len; /* Length of signature data */ +}; + +#endif /* _UAPI_LINUX_MODULE_SIGNATURE_H */ diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h new file mode 100644 index 000000000000..2204708dbf7a --- /dev/null +++ b/tools/include/uapi/linux/mount.h @@ -0,0 +1,245 @@ +#ifndef _UAPI_LINUX_MOUNT_H +#define _UAPI_LINUX_MOUNT_H + +#include <linux/types.h> + +/* + * These are the fs-independent mount-flags: up to 32 flags are supported + * + * Usage of these is restricted within the kernel to core mount(2) code and + * callers of sys_mount() only. Filesystems should be using the SB_* + * equivalent instead. + */ +#define MS_RDONLY 1 /* Mount read-only */ +#define MS_NOSUID 2 /* Ignore suid and sgid bits */ +#define MS_NODEV 4 /* Disallow access to device special files */ +#define MS_NOEXEC 8 /* Disallow program execution */ +#define MS_SYNCHRONOUS 16 /* Writes are synced at once */ +#define MS_REMOUNT 32 /* Alter flags of a mounted FS */ +#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ +#define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */ +#define MS_NOATIME 1024 /* Do not update access times. */ +#define MS_NODIRATIME 2048 /* Do not update directory access times */ +#define MS_BIND 4096 +#define MS_MOVE 8192 +#define MS_REC 16384 +#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. + MS_VERBOSE is deprecated. */ +#define MS_SILENT 32768 +#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define MS_UNBINDABLE (1<<17) /* change to unbindable */ +#define MS_PRIVATE (1<<18) /* change to private */ +#define MS_SLAVE (1<<19) /* change to slave */ +#define MS_SHARED (1<<20) /* change to shared */ +#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ +#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ +#define MS_I_VERSION (1<<23) /* Update inode I_version field */ +#define MS_STRICTATIME (1<<24) /* Always perform atime updates */ +#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ + +/* These sb flags are internal to the kernel */ +#define MS_SUBMOUNT (1<<26) +#define MS_NOREMOTELOCK (1<<27) +#define MS_NOSEC (1<<28) +#define MS_BORN (1<<29) +#define MS_ACTIVE (1<<30) +#define MS_NOUSER (1<<31) + +/* + * Superblock flags that can be altered by MS_REMOUNT + */ +#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\ + MS_LAZYTIME) + +/* + * Old magic mount flag and mask + */ +#define MS_MGC_VAL 0xC0ED0000 +#define MS_MGC_MSK 0xffff0000 + +/* + * open_tree() flags. + */ +#define OPEN_TREE_CLONE (1 << 0) /* Clone the target tree and attach the clone */ +#define OPEN_TREE_NAMESPACE (1 << 1) /* Clone the target tree into a new mount namespace */ +#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ + +/* + * move_mount() flags. + */ +#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */ +#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */ +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ +#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */ +#define MOVE_MOUNT_BENEATH 0x00000200 /* Mount beneath top mount */ +#define MOVE_MOUNT__MASK 0x00000377 + +/* + * fsopen() flags. + */ +#define FSOPEN_CLOEXEC 0x00000001 + +/* + * fspick() flags. + */ +#define FSPICK_CLOEXEC 0x00000001 +#define FSPICK_SYMLINK_NOFOLLOW 0x00000002 +#define FSPICK_NO_AUTOMOUNT 0x00000004 +#define FSPICK_EMPTY_PATH 0x00000008 + +/* + * The type of fsconfig() call made. + */ +enum fsconfig_command { + FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ + FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ + FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ + FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ + FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ + FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ + FSCONFIG_CMD_CREATE = 6, /* Create new or reuse existing superblock */ + FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ + FSCONFIG_CMD_CREATE_EXCL = 8, /* Create new superblock, fail if reusing existing superblock */ +}; + +/* + * fsmount() flags. + */ +#define FSMOUNT_CLOEXEC 0x00000001 +#define FSMOUNT_NAMESPACE 0x00000002 /* Create the mount in a new mount namespace */ + +/* + * Mount attributes. + */ +#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ +#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */ +#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */ +#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */ +#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */ +#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */ +#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ +#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ +#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ +#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ +#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */ + +/* + * mount_setattr() + */ +struct mount_attr { + __u64 attr_set; + __u64 attr_clr; + __u64 propagation; + __u64 userns_fd; +}; + +/* List of all mount_attr versions. */ +#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ + + +/* + * Structure for getting mount/superblock/filesystem info with statmount(2). + * + * The interface is similar to statx(2): individual fields or groups can be + * selected with the @mask argument of statmount(). Kernel will set the @mask + * field according to the supported fields. + * + * If string fields are selected, then the caller needs to pass a buffer that + * has space after the fixed part of the structure. Nul terminated strings are + * copied there and offsets relative to @str are stored in the relevant fields. + * If the buffer is too small, then EOVERFLOW is returned. The actually used + * size is returned in @size. + */ +struct statmount { + __u32 size; /* Total size, including strings */ + __u32 mnt_opts; /* [str] Options (comma separated, escaped) */ + __u64 mask; /* What results were written */ + __u32 sb_dev_major; /* Device ID */ + __u32 sb_dev_minor; + __u64 sb_magic; /* ..._SUPER_MAGIC */ + __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */ + __u32 fs_type; /* [str] Filesystem type */ + __u64 mnt_id; /* Unique ID of mount */ + __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */ + __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */ + __u32 mnt_parent_id_old; + __u64 mnt_attr; /* MOUNT_ATTR_... */ + __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */ + __u64 mnt_peer_group; /* ID of shared peer group */ + __u64 mnt_master; /* Mount receives propagation from this ID */ + __u64 propagate_from; /* Propagation from in current namespace */ + __u32 mnt_root; /* [str] Root of mount relative to root of fs */ + __u32 mnt_point; /* [str] Mountpoint relative to current root */ + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ + __u32 sb_source; /* [str] Source string of the mount */ + __u32 opt_num; /* Number of fs options */ + __u32 opt_array; /* [str] Array of nul terminated fs options */ + __u32 opt_sec_num; /* Number of security options */ + __u32 opt_sec_array; /* [str] Array of nul terminated security options */ + __u64 supported_mask; /* Mask flags that this kernel supports */ + __u32 mnt_uidmap_num; /* Number of uid mappings */ + __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */ + __u32 mnt_gidmap_num; /* Number of gid mappings */ + __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */ + __u64 __spare2[43]; + char str[]; /* Variable size part containing strings */ +}; + +/* + * Structure for passing mount ID and miscellaneous parameters to statmount(2) + * and listmount(2). + * + * For statmount(2) @param represents the request mask. + * For listmount(2) @param represents the last listed mount id (or zero). + */ +struct mnt_id_req { + __u32 size; + union { + __u32 mnt_ns_fd; + __u32 mnt_fd; + }; + __u64 mnt_id; + __u64 param; + __u64 mnt_ns_id; +}; + +/* List of all mnt_id_req versions. */ +#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ + +/* + * @mask bits for statmount(2) + */ +#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */ +#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */ +#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */ +#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ +#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ +#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ +#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ +#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ +#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ +#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ +#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */ +#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */ +#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */ + +/* + * Special @mnt_id values that can be passed to listmount + */ +#define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ + +/* + * @flag bits for statmount(2) + */ +#define STATMOUNT_BY_FD 0x00000001U /* want mountinfo for given fd */ + +#endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/include/uapi/linux/neighbour.h b/tools/include/uapi/linux/neighbour.h new file mode 100644 index 000000000000..c34a81245f87 --- /dev/null +++ b/tools/include/uapi/linux/neighbour.h @@ -0,0 +1,229 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_NEIGHBOUR_H +#define _UAPI__LINUX_NEIGHBOUR_H + +#include <linux/types.h> +#include <linux/netlink.h> + +struct ndmsg { + __u8 ndm_family; + __u8 ndm_pad1; + __u16 ndm_pad2; + __s32 ndm_ifindex; + __u16 ndm_state; + __u8 ndm_flags; + __u8 ndm_type; +}; + +enum { + NDA_UNSPEC, + NDA_DST, + NDA_LLADDR, + NDA_CACHEINFO, + NDA_PROBES, + NDA_VLAN, + NDA_PORT, + NDA_VNI, + NDA_IFINDEX, + NDA_MASTER, + NDA_LINK_NETNSID, + NDA_SRC_VNI, + NDA_PROTOCOL, /* Originator of entry */ + NDA_NH_ID, + NDA_FDB_EXT_ATTRS, + NDA_FLAGS_EXT, + NDA_NDM_STATE_MASK, + NDA_NDM_FLAGS_MASK, + __NDA_MAX +}; + +#define NDA_MAX (__NDA_MAX - 1) + +/* + * Neighbor Cache Entry Flags + */ + +#define NTF_USE (1 << 0) +#define NTF_SELF (1 << 1) +#define NTF_MASTER (1 << 2) +#define NTF_PROXY (1 << 3) /* == ATF_PUBL */ +#define NTF_EXT_LEARNED (1 << 4) +#define NTF_OFFLOADED (1 << 5) +#define NTF_STICKY (1 << 6) +#define NTF_ROUTER (1 << 7) +/* Extended flags under NDA_FLAGS_EXT: */ +#define NTF_EXT_MANAGED (1 << 0) +#define NTF_EXT_LOCKED (1 << 1) +#define NTF_EXT_EXT_VALIDATED (1 << 2) + +/* + * Neighbor Cache Entry States. + */ + +#define NUD_INCOMPLETE 0x01 +#define NUD_REACHABLE 0x02 +#define NUD_STALE 0x04 +#define NUD_DELAY 0x08 +#define NUD_PROBE 0x10 +#define NUD_FAILED 0x20 + +/* Dummy states */ +#define NUD_NOARP 0x40 +#define NUD_PERMANENT 0x80 +#define NUD_NONE 0x00 + +/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no + * address resolution or NUD. + * + * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true + * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier + * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED + * flagged entries explicitly are (which is also consistent with the routing + * subsystem). + * + * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry + * states don't make sense and thus are ignored. Such entries don't age and + * can roam. + * + * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf + * of a user space control plane, and automatically refreshed so that (if + * possible) they remain in NUD_REACHABLE state. + * + * NTF_EXT_LOCKED flagged bridge FDB entries are entries generated by the + * bridge in response to a host trying to communicate via a locked bridge port + * with MAB enabled. Their purpose is to notify user space that a host requires + * authentication. + * + * NTF_EXT_EXT_VALIDATED flagged neighbor entries were externally validated by + * a user space control plane. The kernel will not remove or invalidate them, + * but it can probe them and notify user space when they become reachable. + */ + +struct nda_cacheinfo { + __u32 ndm_confirmed; + __u32 ndm_used; + __u32 ndm_updated; + __u32 ndm_refcnt; +}; + +/***************************************************************** + * Neighbour tables specific messages. + * + * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the + * NLM_F_DUMP flag set. Every neighbour table configuration is + * spread over multiple messages to avoid running into message + * size limits on systems with many interfaces. The first message + * in the sequence transports all not device specific data such as + * statistics, configuration, and the default parameter set. + * This message is followed by 0..n messages carrying device + * specific parameter sets. + * Although the ordering should be sufficient, NDTA_NAME can be + * used to identify sequences. The initial message can be identified + * by checking for NDTA_CONFIG. The device specific messages do + * not contain this TLV but have NDTPA_IFINDEX set to the + * corresponding interface index. + * + * To change neighbour table attributes, send RTM_SETNEIGHTBL + * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], + * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked + * otherwise. Device specific parameter sets can be changed by + * setting NDTPA_IFINDEX to the interface index of the corresponding + * device. + ****/ + +struct ndt_stats { + __u64 ndts_allocs; + __u64 ndts_destroys; + __u64 ndts_hash_grows; + __u64 ndts_res_failed; + __u64 ndts_lookups; + __u64 ndts_hits; + __u64 ndts_rcv_probes_mcast; + __u64 ndts_rcv_probes_ucast; + __u64 ndts_periodic_gc_runs; + __u64 ndts_forced_gc_runs; + __u64 ndts_table_fulls; +}; + +enum { + NDTPA_UNSPEC, + NDTPA_IFINDEX, /* u32, unchangeable */ + NDTPA_REFCNT, /* u32, read-only */ + NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ + NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ + NDTPA_RETRANS_TIME, /* u64, msecs */ + NDTPA_GC_STALETIME, /* u64, msecs */ + NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ + NDTPA_QUEUE_LEN, /* u32 */ + NDTPA_APP_PROBES, /* u32 */ + NDTPA_UCAST_PROBES, /* u32 */ + NDTPA_MCAST_PROBES, /* u32 */ + NDTPA_ANYCAST_DELAY, /* u64, msecs */ + NDTPA_PROXY_DELAY, /* u64, msecs */ + NDTPA_PROXY_QLEN, /* u32 */ + NDTPA_LOCKTIME, /* u64, msecs */ + NDTPA_QUEUE_LENBYTES, /* u32 */ + NDTPA_MCAST_REPROBES, /* u32 */ + NDTPA_PAD, + NDTPA_INTERVAL_PROBE_TIME_MS, /* u64, msecs */ + __NDTPA_MAX +}; +#define NDTPA_MAX (__NDTPA_MAX - 1) + +struct ndtmsg { + __u8 ndtm_family; + __u8 ndtm_pad1; + __u16 ndtm_pad2; +}; + +struct ndt_config { + __u16 ndtc_key_len; + __u16 ndtc_entry_size; + __u32 ndtc_entries; + __u32 ndtc_last_flush; /* delta to now in msecs */ + __u32 ndtc_last_rand; /* delta to now in msecs */ + __u32 ndtc_hash_rnd; + __u32 ndtc_hash_mask; + __u32 ndtc_hash_chain_gc; + __u32 ndtc_proxy_qlen; +}; + +enum { + NDTA_UNSPEC, + NDTA_NAME, /* char *, unchangeable */ + NDTA_THRESH1, /* u32 */ + NDTA_THRESH2, /* u32 */ + NDTA_THRESH3, /* u32 */ + NDTA_CONFIG, /* struct ndt_config, read-only */ + NDTA_PARMS, /* nested TLV NDTPA_* */ + NDTA_STATS, /* struct ndt_stats, read-only */ + NDTA_GC_INTERVAL, /* u64, msecs */ + NDTA_PAD, + __NDTA_MAX +}; +#define NDTA_MAX (__NDTA_MAX - 1) + + /* FDB activity notification bits used in NFEA_ACTIVITY_NOTIFY: + * - FDB_NOTIFY_BIT - notify on activity/expire for any entry + * - FDB_NOTIFY_INACTIVE_BIT - mark as inactive to avoid multiple notifications + */ +enum { + FDB_NOTIFY_BIT = (1 << 0), + FDB_NOTIFY_INACTIVE_BIT = (1 << 1) +}; + +/* embedded into NDA_FDB_EXT_ATTRS: + * [NDA_FDB_EXT_ATTRS] = { + * [NFEA_ACTIVITY_NOTIFY] + * ... + * } + */ +enum { + NFEA_UNSPEC, + NFEA_ACTIVITY_NOTIFY, + NFEA_DONT_REFRESH, + __NFEA_MAX +}; +#define NFEA_MAX (__NFEA_MAX - 1) + +#endif diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index e4be227d3ad6..2f3ab75e8cc0 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_NETDEV_H #define _UAPI_LINUX_NETDEV_H @@ -59,10 +60,13 @@ enum netdev_xdp_rx_metadata { * by the driver. * @NETDEV_XSK_FLAGS_TX_CHECKSUM: L3 checksum HW offload is supported by the * driver. + * @NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO: Launch time HW offload is supported + * by the driver. */ enum netdev_xsk_flags { NETDEV_XSK_FLAGS_TX_TIMESTAMP = 1, NETDEV_XSK_FLAGS_TX_CHECKSUM = 2, + NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO = 4, }; enum netdev_queue_type { @@ -74,6 +78,12 @@ enum netdev_qstats_scope { NETDEV_QSTATS_SCOPE_QUEUE = 1, }; +enum netdev_napi_threaded { + NETDEV_NAPI_THREADED_DISABLED, + NETDEV_NAPI_THREADED_ENABLED, + NETDEV_NAPI_THREADED_BUSY_POLL, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, @@ -87,6 +97,13 @@ enum { }; enum { + NETDEV_A_IO_URING_PROVIDER_INFO_RX_BUF_LEN = 1, + + __NETDEV_A_IO_URING_PROVIDER_INFO_MAX, + NETDEV_A_IO_URING_PROVIDER_INFO_MAX = (__NETDEV_A_IO_URING_PROVIDER_INFO_MAX - 1) +}; + +enum { NETDEV_A_PAGE_POOL_ID = 1, NETDEV_A_PAGE_POOL_IFINDEX, NETDEV_A_PAGE_POOL_NAPI_ID, @@ -94,6 +111,7 @@ enum { NETDEV_A_PAGE_POOL_INFLIGHT_MEM, NETDEV_A_PAGE_POOL_DETACH_TIME, NETDEV_A_PAGE_POOL_DMABUF, + NETDEV_A_PAGE_POOL_IO_URING, __NETDEV_A_PAGE_POOL_MAX, NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) @@ -125,17 +143,26 @@ enum { NETDEV_A_NAPI_DEFER_HARD_IRQS, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, + NETDEV_A_NAPI_THREADED, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) }; enum { + __NETDEV_A_XSK_INFO_MAX, + NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1) +}; + +enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, NETDEV_A_QUEUE_TYPE, NETDEV_A_QUEUE_NAPI_ID, NETDEV_A_QUEUE_DMABUF, + NETDEV_A_QUEUE_IO_URING, + NETDEV_A_QUEUE_XSK, + NETDEV_A_QUEUE_LEASE, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) @@ -179,6 +206,15 @@ enum { }; enum { + NETDEV_A_LEASE_IFINDEX = 1, + NETDEV_A_LEASE_QUEUE, + NETDEV_A_LEASE_NETNS_ID, + + __NETDEV_A_LEASE_MAX, + NETDEV_A_LEASE_MAX = (__NETDEV_A_LEASE_MAX - 1) +}; + +enum { NETDEV_A_DMABUF_IFINDEX = 1, NETDEV_A_DMABUF_QUEUES, NETDEV_A_DMABUF_FD, @@ -203,6 +239,8 @@ enum { NETDEV_CMD_QSTATS_GET, NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, + NETDEV_CMD_BIND_TX, + NETDEV_CMD_QUEUE_CREATE, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/tools/include/uapi/linux/netfilter.h b/tools/include/uapi/linux/netfilter.h new file mode 100644 index 000000000000..5a79ccb76701 --- /dev/null +++ b/tools/include/uapi/linux/netfilter.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_NETFILTER_H +#define _UAPI__LINUX_NETFILTER_H + +#include <linux/types.h> +#include <linux/compiler.h> +#include <linux/in.h> +#include <linux/in6.h> + +/* Responses from hook functions. */ +#define NF_DROP 0 +#define NF_ACCEPT 1 +#define NF_STOLEN 2 +#define NF_QUEUE 3 +#define NF_REPEAT 4 +#define NF_STOP 5 /* Deprecated, for userspace nf_queue compatibility. */ +#define NF_MAX_VERDICT NF_STOP + +/* we overload the higher bits for encoding auxiliary data such as the queue + * number or errno values. Not nice, but better than additional function + * arguments. */ +#define NF_VERDICT_MASK 0x000000ff + +/* extra verdict flags have mask 0x0000ff00 */ +#define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000 + +/* queue number (NF_QUEUE) or errno (NF_DROP) */ +#define NF_VERDICT_QMASK 0xffff0000 +#define NF_VERDICT_QBITS 16 + +#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE) + +#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP) + +/* only for userspace compatibility */ +#ifndef __KERNEL__ + +/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */ +#define NF_VERDICT_BITS 16 +#endif + +enum nf_inet_hooks { + NF_INET_PRE_ROUTING, + NF_INET_LOCAL_IN, + NF_INET_FORWARD, + NF_INET_LOCAL_OUT, + NF_INET_POST_ROUTING, + NF_INET_NUMHOOKS, + NF_INET_INGRESS = NF_INET_NUMHOOKS, +}; + +enum nf_dev_hooks { + NF_NETDEV_INGRESS, + NF_NETDEV_EGRESS, + NF_NETDEV_NUMHOOKS +}; + +enum { + NFPROTO_UNSPEC = 0, + NFPROTO_INET = 1, + NFPROTO_IPV4 = 2, + NFPROTO_ARP = 3, + NFPROTO_NETDEV = 5, + NFPROTO_BRIDGE = 7, + NFPROTO_IPV6 = 10, +#ifndef __KERNEL__ /* no longer supported by kernel */ + NFPROTO_DECNET = 12, +#endif + NFPROTO_NUMPROTO, +}; + +union nf_inet_addr { + __u32 all[4]; + __be32 ip; + __be32 ip6[4]; + struct in_addr in; + struct in6_addr in6; +}; + +#endif /* _UAPI__LINUX_NETFILTER_H */ diff --git a/tools/include/uapi/linux/netfilter_arp.h b/tools/include/uapi/linux/netfilter_arp.h new file mode 100644 index 000000000000..791dfc5ae907 --- /dev/null +++ b/tools/include/uapi/linux/netfilter_arp.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */ +#ifndef __LINUX_ARP_NETFILTER_H +#define __LINUX_ARP_NETFILTER_H + +/* ARP-specific defines for netfilter. + * (C)2002 Rusty Russell IBM -- This code is GPL. + */ + +#include <linux/netfilter.h> + +/* There is no PF_ARP. */ +#define NF_ARP 0 + +/* ARP Hooks */ +#define NF_ARP_IN 0 +#define NF_ARP_OUT 1 +#define NF_ARP_FORWARD 2 + +#ifndef __KERNEL__ +#define NF_ARP_NUMHOOKS 3 +#endif + +#endif /* __LINUX_ARP_NETFILTER_H */ diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h new file mode 100644 index 000000000000..a25e38d1c874 --- /dev/null +++ b/tools/include/uapi/linux/nsfs.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_NSFS_H +#define __LINUX_NSFS_H + +#include <linux/ioctl.h> +#include <linux/types.h> + +#define NSIO 0xb7 + +/* Returns a file descriptor that refers to an owning user namespace */ +#define NS_GET_USERNS _IO(NSIO, 0x1) +/* Returns a file descriptor that refers to a parent namespace */ +#define NS_GET_PARENT _IO(NSIO, 0x2) +/* Returns the type of namespace (CLONE_NEW* value) referred to by + file descriptor */ +#define NS_GET_NSTYPE _IO(NSIO, 0x3) +/* Get owner UID (in the caller's user namespace) for a user namespace */ +#define NS_GET_OWNER_UID _IO(NSIO, 0x4) +/* Translate pid from target pid namespace into the caller's pid namespace. */ +#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int) +/* Return thread-group leader id of pid in the callers pid namespace. */ +#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int) +/* Translate pid from caller's pid namespace into a target pid namespace. */ +#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x8, int) +/* Return thread-group leader id of pid in the target pid namespace. */ +#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int) + +struct mnt_ns_info { + __u32 size; + __u32 nr_mounts; + __u64 mnt_ns_id; +}; + +#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */ + +/* Get information about namespace. */ +#define NS_MNT_GET_INFO _IOR(NSIO, 10, struct mnt_ns_info) +/* Get next namespace. */ +#define NS_MNT_GET_NEXT _IOR(NSIO, 11, struct mnt_ns_info) +/* Get previous namespace. */ +#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info) + +/* Retrieve namespace identifiers. */ +#define NS_GET_MNTNS_ID _IOR(NSIO, 5, __u64) +#define NS_GET_ID _IOR(NSIO, 13, __u64) + +enum init_ns_ino { + IPC_NS_INIT_INO = 0xEFFFFFFFU, + UTS_NS_INIT_INO = 0xEFFFFFFEU, + USER_NS_INIT_INO = 0xEFFFFFFDU, + PID_NS_INIT_INO = 0xEFFFFFFCU, + CGROUP_NS_INIT_INO = 0xEFFFFFFBU, + TIME_NS_INIT_INO = 0xEFFFFFFAU, + NET_NS_INIT_INO = 0xEFFFFFF9U, + MNT_NS_INIT_INO = 0xEFFFFFF8U, +#ifdef __KERNEL__ + MNT_NS_ANON_INO = 0xEFFFFFF7U, +#endif +}; + +struct nsfs_file_handle { + __u64 ns_id; + __u32 ns_type; + __u32 ns_inum; +}; + +#define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */ +#define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */ + +enum init_ns_id { + IPC_NS_INIT_ID = 1ULL, + UTS_NS_INIT_ID = 2ULL, + USER_NS_INIT_ID = 3ULL, + PID_NS_INIT_ID = 4ULL, + CGROUP_NS_INIT_ID = 5ULL, + TIME_NS_INIT_ID = 6ULL, + NET_NS_INIT_ID = 7ULL, + MNT_NS_INIT_ID = 8ULL, +#ifdef __KERNEL__ + NS_LAST_INIT_ID = MNT_NS_INIT_ID, +#endif +}; + +enum ns_type { + TIME_NS = (1ULL << 7), /* CLONE_NEWTIME */ + MNT_NS = (1ULL << 17), /* CLONE_NEWNS */ + CGROUP_NS = (1ULL << 25), /* CLONE_NEWCGROUP */ + UTS_NS = (1ULL << 26), /* CLONE_NEWUTS */ + IPC_NS = (1ULL << 27), /* CLONE_NEWIPC */ + USER_NS = (1ULL << 28), /* CLONE_NEWUSER */ + PID_NS = (1ULL << 29), /* CLONE_NEWPID */ + NET_NS = (1ULL << 30), /* CLONE_NEWNET */ +}; + +/** + * struct ns_id_req - namespace ID request structure + * @size: size of this structure + * @spare: reserved for future use + * @filter: filter mask + * @ns_id: last namespace id + * @user_ns_id: owning user namespace ID + * + * Structure for passing namespace ID and miscellaneous parameters to + * statns(2) and listns(2). + * + * For statns(2) @param represents the request mask. + * For listns(2) @param represents the last listed mount id (or zero). + */ +struct ns_id_req { + __u32 size; + __u32 spare; + __u64 ns_id; + struct /* listns */ { + __u32 ns_type; + __u32 spare2; + __u64 user_ns_id; + }; +}; + +/* + * Special @user_ns_id value that can be passed to listns() + */ +#define LISTNS_CURRENT_USER 0xffffffffffffffff /* Caller's userns */ + +/* List of all ns_id_req versions. */ +#define NS_ID_REQ_SIZE_VER0 32 /* sizeof first published struct */ + +#endif /* __LINUX_NSFS_H */ diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h new file mode 100644 index 000000000000..4759c471676c --- /dev/null +++ b/tools/include/uapi/linux/openat2.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_OPENAT2_H +#define _LINUX_OPENAT2_H + +#include <linux/types.h> + +/* + * Arguments for how openat2(2) should open the target path. If only @flags and + * @mode are non-zero, then openat2(2) operates very similarly to openat(2). + * + * However, unlike openat(2), unknown or invalid bits in @flags result in + * -EINVAL rather than being silently ignored. @mode must be zero unless one of + * {O_CREAT, O_TMPFILE} are set. + * + * @flags: O_* flags. + * @mode: O_CREAT/O_TMPFILE file mode. + * @resolve: RESOLVE_* flags. + */ +struct open_how { + __u64 flags; + __u64 mode; + __u64 resolve; +}; + +/* how->resolve flags for openat2(2). */ +#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings + (includes bind-mounts). */ +#define RESOLVE_NO_MAGICLINKS 0x02 /* Block traversal through procfs-style + "magic-links". */ +#define RESOLVE_NO_SYMLINKS 0x04 /* Block traversal through all symlinks + (implies OEXT_NO_MAGICLINKS) */ +#define RESOLVE_BENEATH 0x08 /* Block "lexical" trickery like + "..", symlinks, and absolute + paths which escape the dirfd. */ +#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".." + be scoped inside the dirfd + (similar to chroot(2)). */ +#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be + completed through cached lookup. May + return -EAGAIN if that's not + possible. */ + +#endif /* _LINUX_OPENAT2_H */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 0524d541d4e3..fd10aa8d697f 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -2,7 +2,7 @@ /* * Performance events: * - * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> + * Copyright (C) 2008-2009, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org> * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra * @@ -39,18 +39,21 @@ enum perf_type_id { /* * attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE + * * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA * AA: hardware event ID * EEEEEEEE: PMU type ID + * * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB * BB: hardware cache ID * CC: hardware cache op ID * DD: hardware cache op result ID * EEEEEEEE: PMU type ID - * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. + * + * If the PMU type ID is 0, PERF_TYPE_RAW will be applied. */ -#define PERF_PMU_TYPE_SHIFT 32 -#define PERF_HW_EVENT_MASK 0xffffffff +#define PERF_PMU_TYPE_SHIFT 32 +#define PERF_HW_EVENT_MASK 0xffffffff /* * Generalized performance event event_id types, used by the @@ -112,7 +115,7 @@ enum perf_hw_cache_op_result_id { /* * Special "software" events provided by the kernel, even if the hardware * does not support performance events. These events measure various - * physical and sw events of the kernel (and allow the profiling of them as + * physical and SW events of the kernel (and allow the profiling of them as * well): */ enum perf_sw_ids { @@ -167,8 +170,9 @@ enum perf_event_sample_format { }; #define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) + /* - * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set + * Values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set. * * If the user does not pass priv level information via branch_sample_type, * the kernel uses the event's priv level. Branch and event priv levels do @@ -178,20 +182,20 @@ enum perf_event_sample_format { * of branches and therefore it supersedes all the other types. */ enum perf_branch_sample_type_shift { - PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ - PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ - PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ - - PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ - PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ - PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ - PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ - PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ - PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ - PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ + PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ + PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ + PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ - PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ + PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* CALL/RET stack */ PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */ PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */ @@ -210,96 +214,95 @@ enum perf_branch_sample_type_shift { }; enum perf_branch_sample_type { - PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, - PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, - PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, + PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, + PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, + PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, - PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, - PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, - PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, - PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, - PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, - PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, + PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, + PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, + PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, + PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, + PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, - PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, - PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, - PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, + PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, + PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, + PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, - PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, - PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, + PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, + PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, - PERF_SAMPLE_BRANCH_TYPE_SAVE = - 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, - PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, - PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; /* - * Common flow change classification + * Common control flow change classifications: */ enum { - PERF_BR_UNKNOWN = 0, /* unknown */ - PERF_BR_COND = 1, /* conditional */ - PERF_BR_UNCOND = 2, /* unconditional */ - PERF_BR_IND = 3, /* indirect */ - PERF_BR_CALL = 4, /* function call */ - PERF_BR_IND_CALL = 5, /* indirect function call */ - PERF_BR_RET = 6, /* function return */ - PERF_BR_SYSCALL = 7, /* syscall */ - PERF_BR_SYSRET = 8, /* syscall return */ - PERF_BR_COND_CALL = 9, /* conditional function call */ - PERF_BR_COND_RET = 10, /* conditional function return */ - PERF_BR_ERET = 11, /* exception return */ - PERF_BR_IRQ = 12, /* irq */ - PERF_BR_SERROR = 13, /* system error */ - PERF_BR_NO_TX = 14, /* not in transaction */ - PERF_BR_EXTEND_ABI = 15, /* extend ABI */ + PERF_BR_UNKNOWN = 0, /* Unknown */ + PERF_BR_COND = 1, /* Conditional */ + PERF_BR_UNCOND = 2, /* Unconditional */ + PERF_BR_IND = 3, /* Indirect */ + PERF_BR_CALL = 4, /* Function call */ + PERF_BR_IND_CALL = 5, /* Indirect function call */ + PERF_BR_RET = 6, /* Function return */ + PERF_BR_SYSCALL = 7, /* Syscall */ + PERF_BR_SYSRET = 8, /* Syscall return */ + PERF_BR_COND_CALL = 9, /* Conditional function call */ + PERF_BR_COND_RET = 10, /* Conditional function return */ + PERF_BR_ERET = 11, /* Exception return */ + PERF_BR_IRQ = 12, /* IRQ */ + PERF_BR_SERROR = 13, /* System error */ + PERF_BR_NO_TX = 14, /* Not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* Extend ABI */ PERF_BR_MAX, }; /* - * Common branch speculation outcome classification + * Common branch speculation outcome classifications: */ enum { - PERF_BR_SPEC_NA = 0, /* Not available */ - PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ - PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ - PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ + PERF_BR_SPEC_NA = 0, /* Not available */ + PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ + PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ + PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ PERF_BR_SPEC_MAX, }; enum { - PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ - PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ - PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ - PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ - PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ - PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ - PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ - PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ PERF_BR_NEW_MAX, }; enum { - PERF_BR_PRIV_UNKNOWN = 0, - PERF_BR_PRIV_USER = 1, - PERF_BR_PRIV_KERNEL = 2, - PERF_BR_PRIV_HV = 3, + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, }; -#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 -#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 -#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 -#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 -#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ @@ -310,9 +313,9 @@ enum { * Values to determine ABI of the registers dump. */ enum perf_sample_regs_abi { - PERF_SAMPLE_REGS_ABI_NONE = 0, - PERF_SAMPLE_REGS_ABI_32 = 1, - PERF_SAMPLE_REGS_ABI_64 = 2, + PERF_SAMPLE_REGS_ABI_NONE = 0, + PERF_SAMPLE_REGS_ABI_32 = 1, + PERF_SAMPLE_REGS_ABI_64 = 2, }; /* @@ -320,21 +323,21 @@ enum perf_sample_regs_abi { * abort events. Multiple bits can be set. */ enum { - PERF_TXN_ELISION = (1 << 0), /* From elision */ - PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ - PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ - PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */ - PERF_TXN_RETRY = (1 << 4), /* Retry possible */ - PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ - PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ - PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ + PERF_TXN_ELISION = (1 << 0), /* From elision */ + PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ + PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ + PERF_TXN_ASYNC = (1 << 3), /* Instruction is not related */ + PERF_TXN_RETRY = (1 << 4), /* Retry possible */ + PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ + PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ + PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ - PERF_TXN_MAX = (1 << 8), /* non-ABI */ + PERF_TXN_MAX = (1 << 8), /* non-ABI */ - /* bits 32..63 are reserved for the abort code */ + /* Bits 32..63 are reserved for the abort code */ - PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), - PERF_TXN_ABORT_SHIFT = 32, + PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), + PERF_TXN_ABORT_SHIFT = 32, }; /* @@ -369,22 +372,23 @@ enum perf_event_read_format { PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; -#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ -#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ -#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ -#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ - /* add: sample_stack_user */ -#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ -#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ -#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ -#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ -#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */ +#define PERF_ATTR_SIZE_VER0 64 /* Size of first published 'struct perf_event_attr' */ +#define PERF_ATTR_SIZE_VER1 72 /* Add: config2 */ +#define PERF_ATTR_SIZE_VER2 80 /* Add: branch_sample_type */ +#define PERF_ATTR_SIZE_VER3 96 /* Add: sample_regs_user */ + /* Add: sample_stack_user */ +#define PERF_ATTR_SIZE_VER4 104 /* Add: sample_regs_intr */ +#define PERF_ATTR_SIZE_VER5 112 /* Add: aux_watermark */ +#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */ +#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */ +#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */ +#define PERF_ATTR_SIZE_VER9 144 /* add: config4 */ /* - * Hardware event_id to monitor via a performance monitoring event: - * - * @sample_max_stack: Max number of frame pointers in a callchain, - * should be < /proc/sys/kernel/perf_event_max_stack + * 'struct perf_event_attr' contains various attributes that define + * a performance event - most of them hardware related configuration + * details, but also a lot of behavioral switches and values implemented + * by the kernel. */ struct perf_event_attr { @@ -394,7 +398,7 @@ struct perf_event_attr { __u32 type; /* - * Size of the attr structure, for fwd/bwd compat. + * Size of the attr structure, for forward/backwards compatibility. */ __u32 size; @@ -449,21 +453,23 @@ struct perf_event_attr { comm_exec : 1, /* flag comm events that are due to an exec */ use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ - write_backward : 1, /* Write ring buffer from end to beginning */ + write_backward : 1, /* write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ ksymbol : 1, /* include ksymbol events */ - bpf_event : 1, /* include bpf events */ + bpf_event : 1, /* include BPF events */ aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ - build_id : 1, /* use build id in mmap2 events */ + build_id : 1, /* use build ID in mmap2 events */ inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ remove_on_exec : 1, /* event is removed from task on exec */ sigtrap : 1, /* send synchronous SIGTRAP on event */ - __reserved_1 : 26; + defer_callchain: 1, /* request PERF_RECORD_CALLCHAIN_DEFERRED records */ + defer_output : 1, /* output PERF_RECORD_CALLCHAIN_DEFERRED records */ + __reserved_1 : 24; union { - __u32 wakeup_events; /* wakeup every n events */ + __u32 wakeup_events; /* wake up every n events */ __u32 wakeup_watermark; /* bytes before wakeup */ }; @@ -472,13 +478,13 @@ struct perf_event_attr { __u64 bp_addr; __u64 kprobe_func; /* for perf_kprobe */ __u64 uprobe_path; /* for perf_uprobe */ - __u64 config1; /* extension of config */ + __u64 config1; /* extension of config */ }; union { __u64 bp_len; - __u64 kprobe_addr; /* when kprobe_func == NULL */ + __u64 kprobe_addr; /* when kprobe_func == NULL */ __u64 probe_offset; /* for perf_[k,u]probe */ - __u64 config2; /* extension of config1 */ + __u64 config2; /* extension of config1 */ }; __u64 branch_sample_type; /* enum perf_branch_sample_type */ @@ -508,7 +514,16 @@ struct perf_event_attr { * Wakeup watermark for AUX area */ __u32 aux_watermark; + + /* + * Max number of frame pointers in a callchain, should be + * lower than /proc/sys/kernel/perf_event_max_stack. + * + * Max number of entries of branch stack should be lower + * than the hardware limit. + */ __u16 sample_max_stack; + __u16 __reserved_2; __u32 aux_sample_size; @@ -531,11 +546,12 @@ struct perf_event_attr { __u64 sig_data; __u64 config3; /* extension of config2 */ + __u64 config4; /* extension of config3 */ }; /* * Structure used by below PERF_EVENT_IOC_QUERY_BPF command - * to query bpf programs attached to the same perf tracepoint + * to query BPF programs attached to the same perf tracepoint * as the given perf event. */ struct perf_event_query_bpf { @@ -557,21 +573,21 @@ struct perf_event_query_bpf { /* * Ioctls that can be done on a perf event fd: */ -#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) -#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) -#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) -#define PERF_EVENT_IOC_RESET _IO ('$', 3) -#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) -#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) -#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) -#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) -#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) -#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW ('$', 4, __u64) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW ('$', 6, char *) +#define PERF_EVENT_IOC_ID _IOR ('$', 7, __u64 *) +#define PERF_EVENT_IOC_SET_BPF _IOW ('$', 8, __u32) +#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW ('$', 9, __u32) #define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) -#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *) +#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW ('$', 11, struct perf_event_attr *) enum perf_event_ioc_flags { - PERF_IOC_FLAG_GROUP = 1U << 0, + PERF_IOC_FLAG_GROUP = 1U << 0, }; /* @@ -582,7 +598,7 @@ struct perf_event_mmap_page { __u32 compat_version; /* lowest version this is compat with */ /* - * Bits needed to read the hw events in user-space. + * Bits needed to read the HW events in user-space. * * u32 seq, time_mult, time_shift, index, width; * u64 count, enabled, running; @@ -620,7 +636,7 @@ struct perf_event_mmap_page { __u32 index; /* hardware event identifier */ __s64 offset; /* add to hardware event value */ __u64 time_enabled; /* time event active */ - __u64 time_running; /* time event on cpu */ + __u64 time_running; /* time event on CPU */ union { __u64 capabilities; struct { @@ -648,7 +664,7 @@ struct perf_event_mmap_page { /* * If cap_usr_time the below fields can be used to compute the time - * delta since time_enabled (in ns) using rdtsc or similar. + * delta since time_enabled (in ns) using RDTSC or similar. * * u64 quot, rem; * u64 delta; @@ -721,7 +737,7 @@ struct perf_event_mmap_page { * after reading this value. * * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data, after issueing + * written by user-space to reflect the last read data, after issuing * an smp_mb() to separate the data read from the ->data_tail store. * In this case the kernel will not over-write unread data. * @@ -737,7 +753,7 @@ struct perf_event_mmap_page { /* * AUX area is defined by aux_{offset,size} fields that should be set - * by the userspace, so that + * by the user-space, so that * * aux_offset >= data_offset + data_size * @@ -811,7 +827,7 @@ struct perf_event_mmap_page { * Indicates that thread was preempted in TASK_RUNNING state. * * PERF_RECORD_MISC_MMAP_BUILD_ID: - * Indicates that mmap2 event carries build id data. + * Indicates that mmap2 event carries build ID data. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) @@ -822,26 +838,26 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) struct perf_event_header { - __u32 type; - __u16 misc; - __u16 size; + __u32 type; + __u16 misc; + __u16 size; }; struct perf_ns_link_info { - __u64 dev; - __u64 ino; + __u64 dev; + __u64 ino; }; enum { - NET_NS_INDEX = 0, - UTS_NS_INDEX = 1, - IPC_NS_INDEX = 2, - PID_NS_INDEX = 3, - USER_NS_INDEX = 4, - MNT_NS_INDEX = 5, - CGROUP_NS_INDEX = 6, - - NR_NAMESPACES, /* number of available namespaces */ + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NR_NAMESPACES, /* number of available namespaces */ }; enum perf_event_type { @@ -857,11 +873,11 @@ enum perf_event_type { * optional fields being ignored. * * struct sample_id { - * { u32 pid, tid; } && PERF_SAMPLE_TID - * { u64 time; } && PERF_SAMPLE_TIME - * { u64 id; } && PERF_SAMPLE_ID - * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID - * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU * { u64 id; } && PERF_SAMPLE_IDENTIFIER * } && perf_event_attr::sample_id_all * @@ -872,7 +888,7 @@ enum perf_event_type { /* * The MMAP events record the PROT_EXEC mappings so that we can - * correlate userspace IPs to code. They have the following structure: + * correlate user-space IPs to code. They have the following structure: * * struct { * struct perf_event_header header; @@ -882,7 +898,7 @@ enum perf_event_type { * u64 len; * u64 pgoff; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP = 1, @@ -892,7 +908,7 @@ enum perf_event_type { * struct perf_event_header header; * u64 id; * u64 lost; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_LOST = 2, @@ -903,7 +919,7 @@ enum perf_event_type { * * u32 pid, tid; * char comm[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_COMM = 3, @@ -914,7 +930,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_EXIT = 4, @@ -925,7 +941,7 @@ enum perf_event_type { * u64 time; * u64 id; * u64 stream_id; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_THROTTLE = 5, @@ -937,7 +953,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_FORK = 7, @@ -948,7 +964,7 @@ enum perf_event_type { * u32 pid, tid; * * struct read_format values; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_READ = 8, @@ -1003,12 +1019,12 @@ enum perf_event_type { * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS * } && PERF_SAMPLE_BRANCH_STACK * - * { u64 abi; # enum perf_sample_regs_abi - * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER * - * { u64 size; - * char data[size]; - * u64 dyn_size; } && PERF_SAMPLE_STACK_USER + * { u64 size; + * char data[size]; + * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * * { union perf_sample_weight * { @@ -1033,10 +1049,11 @@ enum perf_event_type { * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR - * { u64 size; - * char data[size]; } && PERF_SAMPLE_AUX + * { u64 cgroup;} && PERF_SAMPLE_CGROUP * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE + * { u64 size; + * char data[size]; } && PERF_SAMPLE_AUX * }; */ PERF_RECORD_SAMPLE = 9, @@ -1068,7 +1085,7 @@ enum perf_event_type { * }; * u32 prot, flags; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP2 = 10, @@ -1077,12 +1094,12 @@ enum perf_event_type { * Records that new data landed in the AUX buffer part. * * struct { - * struct perf_event_header header; + * struct perf_event_header header; * - * u64 aux_offset; - * u64 aux_size; + * u64 aux_offset; + * u64 aux_size; * u64 flags; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_AUX = 11, @@ -1165,7 +1182,7 @@ enum perf_event_type { PERF_RECORD_KSYMBOL = 17, /* - * Record bpf events: + * Record BPF events: * enum perf_bpf_event_type { * PERF_BPF_EVENT_UNKNOWN = 0, * PERF_BPF_EVENT_PROG_LOAD = 1, @@ -1226,6 +1243,22 @@ enum perf_event_type { */ PERF_RECORD_AUX_OUTPUT_HW_ID = 21, + /* + * This user callchain capture was deferred until shortly before + * returning to user space. Previous samples would have kernel + * callchains only and they need to be stitched with this to make full + * callchains. + * + * struct { + * struct perf_event_header header; + * u64 cookie; + * u64 nr; + * u64 ips[nr]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_CALLCHAIN_DEFERRED = 22, + PERF_RECORD_MAX, /* non-ABI */ }; @@ -1243,181 +1276,203 @@ enum perf_record_ksymbol_type { #define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) enum perf_bpf_event_type { - PERF_BPF_EVENT_UNKNOWN = 0, - PERF_BPF_EVENT_PROG_LOAD = 1, - PERF_BPF_EVENT_PROG_UNLOAD = 2, - PERF_BPF_EVENT_MAX, /* non-ABI */ + PERF_BPF_EVENT_UNKNOWN = 0, + PERF_BPF_EVENT_PROG_LOAD = 1, + PERF_BPF_EVENT_PROG_UNLOAD = 2, + PERF_BPF_EVENT_MAX, /* non-ABI */ }; -#define PERF_MAX_STACK_DEPTH 127 -#define PERF_MAX_CONTEXTS_PER_STACK 8 +#define PERF_MAX_STACK_DEPTH 127 +#define PERF_MAX_CONTEXTS_PER_STACK 8 enum perf_callchain_context { - PERF_CONTEXT_HV = (__u64)-32, - PERF_CONTEXT_KERNEL = (__u64)-128, - PERF_CONTEXT_USER = (__u64)-512, + PERF_CONTEXT_HV = (__u64)-32, + PERF_CONTEXT_KERNEL = (__u64)-128, + PERF_CONTEXT_USER = (__u64)-512, + PERF_CONTEXT_USER_DEFERRED = (__u64)-640, - PERF_CONTEXT_GUEST = (__u64)-2048, - PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, - PERF_CONTEXT_GUEST_USER = (__u64)-2560, + PERF_CONTEXT_GUEST = (__u64)-2048, + PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, + PERF_CONTEXT_GUEST_USER = (__u64)-2560, - PERF_CONTEXT_MAX = (__u64)-4095, + PERF_CONTEXT_MAX = (__u64)-4095, }; /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x0001 /* Record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x0002 /* Snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x0004 /* Record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x0008 /* Sample collided with another */ #define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ /* CoreSight PMU AUX buffer formats */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ -#define PERF_FLAG_FD_NO_GROUP (1UL << 0) -#define PERF_FLAG_FD_OUTPUT (1UL << 1) -#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ -#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ +#define PERF_FLAG_FD_NO_GROUP (1UL << 0) +#define PERF_FLAG_FD_OUTPUT (1UL << 1) +#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup ID, per-CPU mode only */ +#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ #if defined(__LITTLE_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_op:5, /* type of opcode */ - mem_lvl:14, /* memory hierarchy level */ - mem_snoop:5, /* snoop mode */ - mem_lock:2, /* lock instr */ - mem_dtlb:7, /* tlb access */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_remote:1, /* remote */ - mem_snoopx:2, /* snoop mode, ext */ - mem_blk:3, /* access blocked */ - mem_hops:3, /* hop level */ - mem_rsvd:18; + __u64 mem_op : 5, /* Type of opcode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_snoop : 5, /* Snoop mode */ + mem_lock : 2, /* Lock instr */ + mem_dtlb : 7, /* TLB access */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_remote : 1, /* Remote */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_blk : 3, /* Access blocked */ + mem_hops : 3, /* Hop level */ + mem_region : 5, /* cache/memory regions */ + mem_rsvd : 13; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:18, - mem_hops:3, /* hop level */ - mem_blk:3, /* access blocked */ - mem_snoopx:2, /* snoop mode, ext */ - mem_remote:1, /* remote */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_dtlb:7, /* tlb access */ - mem_lock:2, /* lock instr */ - mem_snoop:5, /* snoop mode */ - mem_lvl:14, /* memory hierarchy level */ - mem_op:5; /* type of opcode */ + __u64 mem_rsvd : 13, + mem_region : 5, /* cache/memory regions */ + mem_hops : 3, /* Hop level */ + mem_blk : 3, /* Access blocked */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_remote : 1, /* Remote */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_dtlb : 7, /* TLB access */ + mem_lock : 2, /* Lock instr */ + mem_snoop : 5, /* Snoop mode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_op : 5; /* Type of opcode */ }; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif -/* type of opcode (load/store/prefetch,code) */ -#define PERF_MEM_OP_NA 0x01 /* not available */ -#define PERF_MEM_OP_LOAD 0x02 /* load instruction */ -#define PERF_MEM_OP_STORE 0x04 /* store instruction */ -#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */ -#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ -#define PERF_MEM_OP_SHIFT 0 +/* Type of memory opcode: */ +#define PERF_MEM_OP_NA 0x0001 /* Not available */ +#define PERF_MEM_OP_LOAD 0x0002 /* Load instruction */ +#define PERF_MEM_OP_STORE 0x0004 /* Store instruction */ +#define PERF_MEM_OP_PFETCH 0x0008 /* Prefetch */ +#define PERF_MEM_OP_EXEC 0x0010 /* Code (execution) */ +#define PERF_MEM_OP_SHIFT 0 /* - * PERF_MEM_LVL_* namespace being depricated to some extent in the + * The PERF_MEM_LVL_* namespace is being deprecated to some extent in * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. - * Supporting this namespace inorder to not break defined ABIs. + * We support this namespace in order to not break defined ABIs. * - * memory hierarchy (memory level, hit or miss) + * Memory hierarchy (memory level, hit or miss) */ -#define PERF_MEM_LVL_NA 0x01 /* not available */ -#define PERF_MEM_LVL_HIT 0x02 /* hit level */ -#define PERF_MEM_LVL_MISS 0x04 /* miss level */ -#define PERF_MEM_LVL_L1 0x08 /* L1 */ -#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */ -#define PERF_MEM_LVL_L2 0x20 /* L2 */ -#define PERF_MEM_LVL_L3 0x40 /* L3 */ -#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */ -#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */ -#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */ -#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */ -#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */ -#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ -#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ -#define PERF_MEM_LVL_SHIFT 5 - -#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */ -#define PERF_MEM_REMOTE_SHIFT 37 - -#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */ -#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ -#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ -#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ -/* 0x7 available */ -#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ -#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ -#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ -#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ -#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ -#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ - -#define PERF_MEM_LVLNUM_SHIFT 33 - -/* snoop mode */ -#define PERF_MEM_SNOOP_NA 0x01 /* not available */ -#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ -#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */ -#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */ -#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ -#define PERF_MEM_SNOOP_SHIFT 19 - -#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ -#define PERF_MEM_SNOOPX_SHIFT 38 - -/* locked instruction */ -#define PERF_MEM_LOCK_NA 0x01 /* not available */ -#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ -#define PERF_MEM_LOCK_SHIFT 24 +#define PERF_MEM_LVL_NA 0x0001 /* Not available */ +#define PERF_MEM_LVL_HIT 0x0002 /* Hit level */ +#define PERF_MEM_LVL_MISS 0x0004 /* Miss level */ +#define PERF_MEM_LVL_L1 0x0008 /* L1 */ +#define PERF_MEM_LVL_LFB 0x0010 /* Line Fill Buffer */ +#define PERF_MEM_LVL_L2 0x0020 /* L2 */ +#define PERF_MEM_LVL_L3 0x0040 /* L3 */ +#define PERF_MEM_LVL_LOC_RAM 0x0080 /* Local DRAM */ +#define PERF_MEM_LVL_REM_RAM1 0x0100 /* Remote DRAM (1 hop) */ +#define PERF_MEM_LVL_REM_RAM2 0x0200 /* Remote DRAM (2 hops) */ +#define PERF_MEM_LVL_REM_CCE1 0x0400 /* Remote Cache (1 hop) */ +#define PERF_MEM_LVL_REM_CCE2 0x0800 /* Remote Cache (2 hops) */ +#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ +#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ +#define PERF_MEM_LVL_SHIFT 5 + +#define PERF_MEM_REMOTE_REMOTE 0x0001 /* Remote */ +#define PERF_MEM_REMOTE_SHIFT 37 + +#define PERF_MEM_LVLNUM_L1 0x0001 /* L1 */ +#define PERF_MEM_LVLNUM_L2 0x0002 /* L2 */ +#define PERF_MEM_LVLNUM_L3 0x0003 /* L3 */ +#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */ +#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */ +#define PERF_MEM_LVLNUM_L0 0x0007 /* L0 */ +#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */ +#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */ +#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */ +#define PERF_MEM_LVLNUM_ANY_CACHE 0x000b /* Any cache */ +#define PERF_MEM_LVLNUM_LFB 0x000c /* LFB / L1 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_RAM 0x000d /* RAM */ +#define PERF_MEM_LVLNUM_PMEM 0x000e /* PMEM */ +#define PERF_MEM_LVLNUM_NA 0x000f /* N/A */ + +#define PERF_MEM_LVLNUM_SHIFT 33 + +/* Snoop mode */ +#define PERF_MEM_SNOOP_NA 0x0001 /* Not available */ +#define PERF_MEM_SNOOP_NONE 0x0002 /* No snoop */ +#define PERF_MEM_SNOOP_HIT 0x0004 /* Snoop hit */ +#define PERF_MEM_SNOOP_MISS 0x0008 /* Snoop miss */ +#define PERF_MEM_SNOOP_HITM 0x0010 /* Snoop hit modified */ +#define PERF_MEM_SNOOP_SHIFT 19 + +#define PERF_MEM_SNOOPX_FWD 0x0001 /* Forward */ +#define PERF_MEM_SNOOPX_PEER 0x0002 /* Transfer from peer */ +#define PERF_MEM_SNOOPX_SHIFT 38 + +/* Locked instruction */ +#define PERF_MEM_LOCK_NA 0x0001 /* Not available */ +#define PERF_MEM_LOCK_LOCKED 0x0002 /* Locked transaction */ +#define PERF_MEM_LOCK_SHIFT 24 /* TLB access */ -#define PERF_MEM_TLB_NA 0x01 /* not available */ -#define PERF_MEM_TLB_HIT 0x02 /* hit level */ -#define PERF_MEM_TLB_MISS 0x04 /* miss level */ -#define PERF_MEM_TLB_L1 0x08 /* L1 */ -#define PERF_MEM_TLB_L2 0x10 /* L2 */ -#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/ -#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ -#define PERF_MEM_TLB_SHIFT 26 +#define PERF_MEM_TLB_NA 0x0001 /* Not available */ +#define PERF_MEM_TLB_HIT 0x0002 /* Hit level */ +#define PERF_MEM_TLB_MISS 0x0004 /* Miss level */ +#define PERF_MEM_TLB_L1 0x0008 /* L1 */ +#define PERF_MEM_TLB_L2 0x0010 /* L2 */ +#define PERF_MEM_TLB_WK 0x0020 /* Hardware Walker*/ +#define PERF_MEM_TLB_OS 0x0040 /* OS fault handler */ +#define PERF_MEM_TLB_SHIFT 26 /* Access blocked */ -#define PERF_MEM_BLK_NA 0x01 /* not available */ -#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */ -#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ -#define PERF_MEM_BLK_SHIFT 40 - -/* hop level */ -#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ -#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ -#define PERF_MEM_HOPS_3 0x04 /* remote board */ +#define PERF_MEM_BLK_NA 0x0001 /* Not available */ +#define PERF_MEM_BLK_DATA 0x0002 /* Data could not be forwarded */ +#define PERF_MEM_BLK_ADDR 0x0004 /* Address conflict */ +#define PERF_MEM_BLK_SHIFT 40 + +/* Hop level */ +#define PERF_MEM_HOPS_0 0x0001 /* Remote core, same node */ +#define PERF_MEM_HOPS_1 0x0002 /* Remote node, same socket */ +#define PERF_MEM_HOPS_2 0x0003 /* Remote socket, same board */ +#define PERF_MEM_HOPS_3 0x0004 /* Remote board */ /* 5-7 available */ -#define PERF_MEM_HOPS_SHIFT 43 +#define PERF_MEM_HOPS_SHIFT 43 + +/* Cache/Memory region */ +#define PERF_MEM_REGION_NA 0x0 /* Invalid */ +#define PERF_MEM_REGION_RSVD 0x01 /* Reserved */ +#define PERF_MEM_REGION_L_SHARE 0x02 /* Local CA shared cache */ +#define PERF_MEM_REGION_L_NON_SHARE 0x03 /* Local CA non-shared cache */ +#define PERF_MEM_REGION_O_IO 0x04 /* Other CA IO agent */ +#define PERF_MEM_REGION_O_SHARE 0x05 /* Other CA shared cache */ +#define PERF_MEM_REGION_O_NON_SHARE 0x06 /* Other CA non-shared cache */ +#define PERF_MEM_REGION_MMIO 0x07 /* MMIO */ +#define PERF_MEM_REGION_MEM0 0x08 /* Memory region 0 */ +#define PERF_MEM_REGION_MEM1 0x09 /* Memory region 1 */ +#define PERF_MEM_REGION_MEM2 0x0a /* Memory region 2 */ +#define PERF_MEM_REGION_MEM3 0x0b /* Memory region 3 */ +#define PERF_MEM_REGION_MEM4 0x0c /* Memory region 4 */ +#define PERF_MEM_REGION_MEM5 0x0d /* Memory region 5 */ +#define PERF_MEM_REGION_MEM6 0x0e /* Memory region 6 */ +#define PERF_MEM_REGION_MEM7 0x0f /* Memory region 7 */ +#define PERF_MEM_REGION_SHIFT 46 #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) /* - * single taken branch record layout: + * Layout of single taken branch records: * * from: source instruction (may not always be a branch insn) * to: branch target @@ -1436,37 +1491,37 @@ union perf_mem_data_src { struct perf_branch_entry { __u64 from; __u64 to; - __u64 mispred:1, /* target mispredicted */ - predicted:1,/* target predicted */ - in_tx:1, /* in transaction */ - abort:1, /* transaction abort */ - cycles:16, /* cycle count to last branch */ - type:4, /* branch type */ - spec:2, /* branch speculation info */ - new_type:4, /* additional branch type */ - priv:3, /* privilege level */ - reserved:31; + __u64 mispred : 1, /* target mispredicted */ + predicted : 1, /* target predicted */ + in_tx : 1, /* in transaction */ + abort : 1, /* transaction abort */ + cycles : 16, /* cycle count to last branch */ + type : 4, /* branch type */ + spec : 2, /* branch speculation info */ + new_type : 4, /* additional branch type */ + priv : 3, /* privilege level */ + reserved : 31; }; /* Size of used info bits in struct perf_branch_entry */ #define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 union perf_sample_weight { - __u64 full; + __u64 full; #if defined(__LITTLE_ENDIAN_BITFIELD) struct { - __u32 var1_dw; - __u16 var2_w; - __u16 var3_w; + __u32 var1_dw; + __u16 var2_w; + __u16 var3_w; }; #elif defined(__BIG_ENDIAN_BITFIELD) struct { - __u16 var3_w; - __u16 var2_w; - __u32 var1_dw; + __u16 var3_w; + __u16 var2_w; + __u32 var1_dw; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif }; diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 35791791a879..475fc8ca4403 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -230,7 +230,7 @@ struct prctl_mm_map { # define PR_PAC_APDBKEY (1UL << 3) # define PR_PAC_APGAKEY (1UL << 4) -/* Tagged user address controls for arm64 */ +/* Tagged user address controls for arm64 and RISC-V */ #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) @@ -244,6 +244,9 @@ struct prctl_mm_map { # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Unused; kept only for source compatibility */ # define PR_MTE_TCF_SHIFT 1 +/* RISC-V pointer masking tag length */ +# define PR_PMLEN_SHIFT 24 +# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 @@ -252,7 +255,12 @@ struct prctl_mm_map { /* Dispatch syscalls to a userspace handler */ #define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 -# define PR_SYS_DISPATCH_ON 1 +/* Enable dispatch except for the specified range */ +# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1 +/* Enable dispatch for the specified range */ +# define PR_SYS_DISPATCH_INCLUSIVE_ON 2 +/* Legacy name for backwards compatibility */ +# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON /* The control values for the user space selector when dispatch is enabled */ # define SYSCALL_DISPATCH_FILTER_ALLOW 0 # define SYSCALL_DISPATCH_FILTER_BLOCK 1 @@ -328,4 +336,42 @@ struct prctl_mm_map { # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */ # define PR_PPC_DEXCR_CTRL_MASK 0x1f +/* + * Get the current shadow stack configuration for the current thread, + * this will be the value configured via PR_SET_SHADOW_STACK_STATUS. + */ +#define PR_GET_SHADOW_STACK_STATUS 74 + +/* + * Set the current shadow stack configuration. Enabling the shadow + * stack will cause a shadow stack to be allocated for the thread. + */ +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +/* + * Prevent further changes to the specified shadow stack + * configuration. All bits may be locked via this call, including + * undefined bits. + */ +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +/* + * Controls the mode of timer_create() for CRIU restore operations. + * Enabling this allows CRIU to restore timers with explicit IDs. + * + * Don't use for normal operations as the result might be undefined. + */ +#define PR_TIMER_CREATE_RESTORE_IDS 77 +# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0 +# define PR_TIMER_CREATE_RESTORE_IDS_ON 1 +# define PR_TIMER_CREATE_RESTORE_IDS_GET 2 + +/* FUTEX hash management */ +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define PR_FUTEX_HASH_GET_SLOTS 2 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/include/uapi/linux/rtnetlink.h b/tools/include/uapi/linux/rtnetlink.h new file mode 100644 index 000000000000..dab9493c791b --- /dev/null +++ b/tools/include/uapi/linux/rtnetlink.h @@ -0,0 +1,848 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_RTNETLINK_H +#define _UAPI__LINUX_RTNETLINK_H + +#include <linux/types.h> +#include <linux/netlink.h> +#include <linux/if_link.h> +#include <linux/if_addr.h> +#include <linux/neighbour.h> + +/* rtnetlink families. Values up to 127 are reserved for real address + * families, values above 128 may be used arbitrarily. + */ +#define RTNL_FAMILY_IPMR 128 +#define RTNL_FAMILY_IP6MR 129 +#define RTNL_FAMILY_MAX 129 + +/**** + * Routing/neighbour discovery messages. + ****/ + +/* Types of messages */ + +enum { + RTM_BASE = 16, +#define RTM_BASE RTM_BASE + + RTM_NEWLINK = 16, +#define RTM_NEWLINK RTM_NEWLINK + RTM_DELLINK, +#define RTM_DELLINK RTM_DELLINK + RTM_GETLINK, +#define RTM_GETLINK RTM_GETLINK + RTM_SETLINK, +#define RTM_SETLINK RTM_SETLINK + + RTM_NEWADDR = 20, +#define RTM_NEWADDR RTM_NEWADDR + RTM_DELADDR, +#define RTM_DELADDR RTM_DELADDR + RTM_GETADDR, +#define RTM_GETADDR RTM_GETADDR + + RTM_NEWROUTE = 24, +#define RTM_NEWROUTE RTM_NEWROUTE + RTM_DELROUTE, +#define RTM_DELROUTE RTM_DELROUTE + RTM_GETROUTE, +#define RTM_GETROUTE RTM_GETROUTE + + RTM_NEWNEIGH = 28, +#define RTM_NEWNEIGH RTM_NEWNEIGH + RTM_DELNEIGH, +#define RTM_DELNEIGH RTM_DELNEIGH + RTM_GETNEIGH, +#define RTM_GETNEIGH RTM_GETNEIGH + + RTM_NEWRULE = 32, +#define RTM_NEWRULE RTM_NEWRULE + RTM_DELRULE, +#define RTM_DELRULE RTM_DELRULE + RTM_GETRULE, +#define RTM_GETRULE RTM_GETRULE + + RTM_NEWQDISC = 36, +#define RTM_NEWQDISC RTM_NEWQDISC + RTM_DELQDISC, +#define RTM_DELQDISC RTM_DELQDISC + RTM_GETQDISC, +#define RTM_GETQDISC RTM_GETQDISC + + RTM_NEWTCLASS = 40, +#define RTM_NEWTCLASS RTM_NEWTCLASS + RTM_DELTCLASS, +#define RTM_DELTCLASS RTM_DELTCLASS + RTM_GETTCLASS, +#define RTM_GETTCLASS RTM_GETTCLASS + + RTM_NEWTFILTER = 44, +#define RTM_NEWTFILTER RTM_NEWTFILTER + RTM_DELTFILTER, +#define RTM_DELTFILTER RTM_DELTFILTER + RTM_GETTFILTER, +#define RTM_GETTFILTER RTM_GETTFILTER + + RTM_NEWACTION = 48, +#define RTM_NEWACTION RTM_NEWACTION + RTM_DELACTION, +#define RTM_DELACTION RTM_DELACTION + RTM_GETACTION, +#define RTM_GETACTION RTM_GETACTION + + RTM_NEWPREFIX = 52, +#define RTM_NEWPREFIX RTM_NEWPREFIX + + RTM_NEWMULTICAST = 56, +#define RTM_NEWMULTICAST RTM_NEWMULTICAST + RTM_DELMULTICAST, +#define RTM_DELMULTICAST RTM_DELMULTICAST + RTM_GETMULTICAST, +#define RTM_GETMULTICAST RTM_GETMULTICAST + + RTM_NEWANYCAST = 60, +#define RTM_NEWANYCAST RTM_NEWANYCAST + RTM_DELANYCAST, +#define RTM_DELANYCAST RTM_DELANYCAST + RTM_GETANYCAST, +#define RTM_GETANYCAST RTM_GETANYCAST + + RTM_NEWNEIGHTBL = 64, +#define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL + RTM_GETNEIGHTBL = 66, +#define RTM_GETNEIGHTBL RTM_GETNEIGHTBL + RTM_SETNEIGHTBL, +#define RTM_SETNEIGHTBL RTM_SETNEIGHTBL + + RTM_NEWNDUSEROPT = 68, +#define RTM_NEWNDUSEROPT RTM_NEWNDUSEROPT + + RTM_NEWADDRLABEL = 72, +#define RTM_NEWADDRLABEL RTM_NEWADDRLABEL + RTM_DELADDRLABEL, +#define RTM_DELADDRLABEL RTM_DELADDRLABEL + RTM_GETADDRLABEL, +#define RTM_GETADDRLABEL RTM_GETADDRLABEL + + RTM_GETDCB = 78, +#define RTM_GETDCB RTM_GETDCB + RTM_SETDCB, +#define RTM_SETDCB RTM_SETDCB + + RTM_NEWNETCONF = 80, +#define RTM_NEWNETCONF RTM_NEWNETCONF + RTM_DELNETCONF, +#define RTM_DELNETCONF RTM_DELNETCONF + RTM_GETNETCONF = 82, +#define RTM_GETNETCONF RTM_GETNETCONF + + RTM_NEWMDB = 84, +#define RTM_NEWMDB RTM_NEWMDB + RTM_DELMDB = 85, +#define RTM_DELMDB RTM_DELMDB + RTM_GETMDB = 86, +#define RTM_GETMDB RTM_GETMDB + + RTM_NEWNSID = 88, +#define RTM_NEWNSID RTM_NEWNSID + RTM_DELNSID = 89, +#define RTM_DELNSID RTM_DELNSID + RTM_GETNSID = 90, +#define RTM_GETNSID RTM_GETNSID + + RTM_NEWSTATS = 92, +#define RTM_NEWSTATS RTM_NEWSTATS + RTM_GETSTATS = 94, +#define RTM_GETSTATS RTM_GETSTATS + RTM_SETSTATS, +#define RTM_SETSTATS RTM_SETSTATS + + RTM_NEWCACHEREPORT = 96, +#define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT + + RTM_NEWCHAIN = 100, +#define RTM_NEWCHAIN RTM_NEWCHAIN + RTM_DELCHAIN, +#define RTM_DELCHAIN RTM_DELCHAIN + RTM_GETCHAIN, +#define RTM_GETCHAIN RTM_GETCHAIN + + RTM_NEWNEXTHOP = 104, +#define RTM_NEWNEXTHOP RTM_NEWNEXTHOP + RTM_DELNEXTHOP, +#define RTM_DELNEXTHOP RTM_DELNEXTHOP + RTM_GETNEXTHOP, +#define RTM_GETNEXTHOP RTM_GETNEXTHOP + + RTM_NEWLINKPROP = 108, +#define RTM_NEWLINKPROP RTM_NEWLINKPROP + RTM_DELLINKPROP, +#define RTM_DELLINKPROP RTM_DELLINKPROP + RTM_GETLINKPROP, +#define RTM_GETLINKPROP RTM_GETLINKPROP + + RTM_NEWVLAN = 112, +#define RTM_NEWVLAN RTM_NEWVLAN + RTM_DELVLAN, +#define RTM_DELVLAN RTM_DELVLAN + RTM_GETVLAN, +#define RTM_GETVLAN RTM_GETVLAN + + RTM_NEWNEXTHOPBUCKET = 116, +#define RTM_NEWNEXTHOPBUCKET RTM_NEWNEXTHOPBUCKET + RTM_DELNEXTHOPBUCKET, +#define RTM_DELNEXTHOPBUCKET RTM_DELNEXTHOPBUCKET + RTM_GETNEXTHOPBUCKET, +#define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET + + RTM_NEWTUNNEL = 120, +#define RTM_NEWTUNNEL RTM_NEWTUNNEL + RTM_DELTUNNEL, +#define RTM_DELTUNNEL RTM_DELTUNNEL + RTM_GETTUNNEL, +#define RTM_GETTUNNEL RTM_GETTUNNEL + + __RTM_MAX, +#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) +}; + +#define RTM_NR_MSGTYPES (RTM_MAX + 1 - RTM_BASE) +#define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2) +#define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2) + +/* + Generic structure for encapsulation of optional route information. + It is reminiscent of sockaddr, but with sa_family replaced + with attribute type. + */ + +struct rtattr { + unsigned short rta_len; + unsigned short rta_type; +}; + +/* Macros to handle rtattributes */ + +#define RTA_ALIGNTO 4U +#define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) ) +#define RTA_OK(rta,len) ((len) >= (int)sizeof(struct rtattr) && \ + (rta)->rta_len >= sizeof(struct rtattr) && \ + (rta)->rta_len <= (len)) +#define RTA_NEXT(rta,attrlen) ((attrlen) -= RTA_ALIGN((rta)->rta_len), \ + (struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len))) +#define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len)) +#define RTA_SPACE(len) RTA_ALIGN(RTA_LENGTH(len)) +#define RTA_DATA(rta) ((void*)(((char*)(rta)) + RTA_LENGTH(0))) +#define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0)) + + + + +/****************************************************************************** + * Definitions used in routing table administration. + ****/ + +struct rtmsg { + unsigned char rtm_family; + unsigned char rtm_dst_len; + unsigned char rtm_src_len; + unsigned char rtm_tos; + + unsigned char rtm_table; /* Routing table id */ + unsigned char rtm_protocol; /* Routing protocol; see below */ + unsigned char rtm_scope; /* See below */ + unsigned char rtm_type; /* See below */ + + unsigned rtm_flags; +}; + +/* rtm_type */ + +enum { + RTN_UNSPEC, + RTN_UNICAST, /* Gateway or direct route */ + RTN_LOCAL, /* Accept locally */ + RTN_BROADCAST, /* Accept locally as broadcast, + send as broadcast */ + RTN_ANYCAST, /* Accept locally as broadcast, + but send as unicast */ + RTN_MULTICAST, /* Multicast route */ + RTN_BLACKHOLE, /* Drop */ + RTN_UNREACHABLE, /* Destination is unreachable */ + RTN_PROHIBIT, /* Administratively prohibited */ + RTN_THROW, /* Not in this table */ + RTN_NAT, /* Translate this address */ + RTN_XRESOLVE, /* Use external resolver */ + __RTN_MAX +}; + +#define RTN_MAX (__RTN_MAX - 1) + + +/* rtm_protocol */ + +#define RTPROT_UNSPEC 0 +#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects; + not used by current IPv4 */ +#define RTPROT_KERNEL 2 /* Route installed by kernel */ +#define RTPROT_BOOT 3 /* Route installed during boot */ +#define RTPROT_STATIC 4 /* Route installed by administrator */ + +/* Values of protocol >= RTPROT_STATIC are not interpreted by kernel; + they are just passed from user and back as is. + It will be used by hypothetical multiple routing daemons. + Note that protocol values should be standardized in order to + avoid conflicts. + */ + +#define RTPROT_GATED 8 /* Apparently, GateD */ +#define RTPROT_RA 9 /* RDISC/ND router advertisements */ +#define RTPROT_MRT 10 /* Merit MRT */ +#define RTPROT_ZEBRA 11 /* Zebra */ +#define RTPROT_BIRD 12 /* BIRD */ +#define RTPROT_DNROUTED 13 /* DECnet routing daemon */ +#define RTPROT_XORP 14 /* XORP */ +#define RTPROT_NTK 15 /* Netsukuku */ +#define RTPROT_DHCP 16 /* DHCP client */ +#define RTPROT_MROUTED 17 /* Multicast daemon */ +#define RTPROT_KEEPALIVED 18 /* Keepalived daemon */ +#define RTPROT_BABEL 42 /* Babel daemon */ +#define RTPROT_OVN 84 /* OVN daemon */ +#define RTPROT_OPENR 99 /* Open Routing (Open/R) Routes */ +#define RTPROT_BGP 186 /* BGP Routes */ +#define RTPROT_ISIS 187 /* ISIS Routes */ +#define RTPROT_OSPF 188 /* OSPF Routes */ +#define RTPROT_RIP 189 /* RIP Routes */ +#define RTPROT_EIGRP 192 /* EIGRP Routes */ + +/* rtm_scope + + Really it is not scope, but sort of distance to the destination. + NOWHERE are reserved for not existing destinations, HOST is our + local addresses, LINK are destinations, located on directly attached + link and UNIVERSE is everywhere in the Universe. + + Intermediate values are also possible f.e. interior routes + could be assigned a value between UNIVERSE and LINK. +*/ + +enum rt_scope_t { + RT_SCOPE_UNIVERSE=0, +/* User defined values */ + RT_SCOPE_SITE=200, + RT_SCOPE_LINK=253, + RT_SCOPE_HOST=254, + RT_SCOPE_NOWHERE=255 +}; + +/* rtm_flags */ + +#define RTM_F_NOTIFY 0x100 /* Notify user of route change */ +#define RTM_F_CLONED 0x200 /* This route is cloned */ +#define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */ +#define RTM_F_PREFIX 0x800 /* Prefix addresses */ +#define RTM_F_LOOKUP_TABLE 0x1000 /* set rtm_table to FIB lookup result */ +#define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */ +#define RTM_F_OFFLOAD 0x4000 /* route is offloaded */ +#define RTM_F_TRAP 0x8000 /* route is trapping packets */ +#define RTM_F_OFFLOAD_FAILED 0x20000000 /* route offload failed, this value + * is chosen to avoid conflicts with + * other flags defined in + * include/uapi/linux/ipv6_route.h + */ + +/* Reserved table identifiers */ + +enum rt_class_t { + RT_TABLE_UNSPEC=0, +/* User defined values */ + RT_TABLE_COMPAT=252, + RT_TABLE_DEFAULT=253, + RT_TABLE_MAIN=254, + RT_TABLE_LOCAL=255, + RT_TABLE_MAX=0xFFFFFFFF +}; + + +/* Routing message attributes */ + +enum rtattr_type_t { + RTA_UNSPEC, + RTA_DST, + RTA_SRC, + RTA_IIF, + RTA_OIF, + RTA_GATEWAY, + RTA_PRIORITY, + RTA_PREFSRC, + RTA_METRICS, + RTA_MULTIPATH, + RTA_PROTOINFO, /* no longer used */ + RTA_FLOW, + RTA_CACHEINFO, + RTA_SESSION, /* no longer used */ + RTA_MP_ALGO, /* no longer used */ + RTA_TABLE, + RTA_MARK, + RTA_MFC_STATS, + RTA_VIA, + RTA_NEWDST, + RTA_PREF, + RTA_ENCAP_TYPE, + RTA_ENCAP, + RTA_EXPIRES, + RTA_PAD, + RTA_UID, + RTA_TTL_PROPAGATE, + RTA_IP_PROTO, + RTA_SPORT, + RTA_DPORT, + RTA_NH_ID, + RTA_FLOWLABEL, + __RTA_MAX +}; + +#define RTA_MAX (__RTA_MAX - 1) + +#define RTM_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct rtmsg)))) +#define RTM_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct rtmsg)) + +/* RTM_MULTIPATH --- array of struct rtnexthop. + * + * "struct rtnexthop" describes all necessary nexthop information, + * i.e. parameters of path to a destination via this nexthop. + * + * At the moment it is impossible to set different prefsrc, mtu, window + * and rtt for different paths from multipath. + */ + +struct rtnexthop { + unsigned short rtnh_len; + unsigned char rtnh_flags; + unsigned char rtnh_hops; + int rtnh_ifindex; +}; + +/* rtnh_flags */ + +#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ +#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ +#define RTNH_F_ONLINK 4 /* Gateway is forced on link */ +#define RTNH_F_OFFLOAD 8 /* Nexthop is offloaded */ +#define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */ +#define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */ +#define RTNH_F_TRAP 64 /* Nexthop is trapping packets */ + +#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \ + RTNH_F_OFFLOAD | RTNH_F_TRAP) + +/* Macros to handle hexthops */ + +#define RTNH_ALIGNTO 4 +#define RTNH_ALIGN(len) ( ((len)+RTNH_ALIGNTO-1) & ~(RTNH_ALIGNTO-1) ) +#define RTNH_OK(rtnh,len) ((rtnh)->rtnh_len >= sizeof(struct rtnexthop) && \ + ((int)(rtnh)->rtnh_len) <= (len)) +#define RTNH_NEXT(rtnh) ((struct rtnexthop*)(((char*)(rtnh)) + RTNH_ALIGN((rtnh)->rtnh_len))) +#define RTNH_LENGTH(len) (RTNH_ALIGN(sizeof(struct rtnexthop)) + (len)) +#define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len)) +#define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0))) + +/* RTA_VIA */ +struct rtvia { + __kernel_sa_family_t rtvia_family; + __u8 rtvia_addr[]; +}; + +/* RTM_CACHEINFO */ + +struct rta_cacheinfo { + __u32 rta_clntref; + __u32 rta_lastuse; + __s32 rta_expires; + __u32 rta_error; + __u32 rta_used; + +#define RTNETLINK_HAVE_PEERINFO 1 + __u32 rta_id; + __u32 rta_ts; + __u32 rta_tsage; +}; + +/* RTM_METRICS --- array of struct rtattr with types of RTAX_* */ + +enum { + RTAX_UNSPEC, +#define RTAX_UNSPEC RTAX_UNSPEC + RTAX_LOCK, +#define RTAX_LOCK RTAX_LOCK + RTAX_MTU, +#define RTAX_MTU RTAX_MTU + RTAX_WINDOW, +#define RTAX_WINDOW RTAX_WINDOW + RTAX_RTT, +#define RTAX_RTT RTAX_RTT + RTAX_RTTVAR, +#define RTAX_RTTVAR RTAX_RTTVAR + RTAX_SSTHRESH, +#define RTAX_SSTHRESH RTAX_SSTHRESH + RTAX_CWND, +#define RTAX_CWND RTAX_CWND + RTAX_ADVMSS, +#define RTAX_ADVMSS RTAX_ADVMSS + RTAX_REORDERING, +#define RTAX_REORDERING RTAX_REORDERING + RTAX_HOPLIMIT, +#define RTAX_HOPLIMIT RTAX_HOPLIMIT + RTAX_INITCWND, +#define RTAX_INITCWND RTAX_INITCWND + RTAX_FEATURES, +#define RTAX_FEATURES RTAX_FEATURES + RTAX_RTO_MIN, +#define RTAX_RTO_MIN RTAX_RTO_MIN + RTAX_INITRWND, +#define RTAX_INITRWND RTAX_INITRWND + RTAX_QUICKACK, +#define RTAX_QUICKACK RTAX_QUICKACK + RTAX_CC_ALGO, +#define RTAX_CC_ALGO RTAX_CC_ALGO + RTAX_FASTOPEN_NO_COOKIE, +#define RTAX_FASTOPEN_NO_COOKIE RTAX_FASTOPEN_NO_COOKIE + __RTAX_MAX +}; + +#define RTAX_MAX (__RTAX_MAX - 1) + +#define RTAX_FEATURE_ECN (1 << 0) +#define RTAX_FEATURE_SACK (1 << 1) /* unused */ +#define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */ +#define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */ +#define RTAX_FEATURE_TCP_USEC_TS (1 << 4) + +#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \ + RTAX_FEATURE_SACK | \ + RTAX_FEATURE_TIMESTAMP | \ + RTAX_FEATURE_ALLFRAG | \ + RTAX_FEATURE_TCP_USEC_TS) + +struct rta_session { + __u8 proto; + __u8 pad1; + __u16 pad2; + + union { + struct { + __u16 sport; + __u16 dport; + } ports; + + struct { + __u8 type; + __u8 code; + __u16 ident; + } icmpt; + + __u32 spi; + } u; +}; + +struct rta_mfc_stats { + __u64 mfcs_packets; + __u64 mfcs_bytes; + __u64 mfcs_wrong_if; +}; + +/**** + * General form of address family dependent message. + ****/ + +struct rtgenmsg { + unsigned char rtgen_family; +}; + +/***************************************************************** + * Link layer specific messages. + ****/ + +/* struct ifinfomsg + * passes link level specific information, not dependent + * on network protocol. + */ + +struct ifinfomsg { + unsigned char ifi_family; + unsigned char __ifi_pad; + unsigned short ifi_type; /* ARPHRD_* */ + int ifi_index; /* Link index */ + unsigned ifi_flags; /* IFF_* flags */ + unsigned ifi_change; /* IFF_* change mask */ +}; + +/******************************************************************** + * prefix information + ****/ + +struct prefixmsg { + unsigned char prefix_family; + unsigned char prefix_pad1; + unsigned short prefix_pad2; + int prefix_ifindex; + unsigned char prefix_type; + unsigned char prefix_len; + unsigned char prefix_flags; + unsigned char prefix_pad3; +}; + +enum +{ + PREFIX_UNSPEC, + PREFIX_ADDRESS, + PREFIX_CACHEINFO, + __PREFIX_MAX +}; + +#define PREFIX_MAX (__PREFIX_MAX - 1) + +struct prefix_cacheinfo { + __u32 preferred_time; + __u32 valid_time; +}; + + +/***************************************************************** + * Traffic control messages. + ****/ + +struct tcmsg { + unsigned char tcm_family; + unsigned char tcm__pad1; + unsigned short tcm__pad2; + int tcm_ifindex; + __u32 tcm_handle; + __u32 tcm_parent; +/* tcm_block_index is used instead of tcm_parent + * in case tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK + */ +#define tcm_block_index tcm_parent + __u32 tcm_info; +}; + +/* For manipulation of filters in shared block, tcm_ifindex is set to + * TCM_IFINDEX_MAGIC_BLOCK, and tcm_parent is aliased to tcm_block_index + * which is the block index. + */ +#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU) + +enum { + TCA_UNSPEC, + TCA_KIND, + TCA_OPTIONS, + TCA_STATS, + TCA_XSTATS, + TCA_RATE, + TCA_FCNT, + TCA_STATS2, + TCA_STAB, + TCA_PAD, + TCA_DUMP_INVISIBLE, + TCA_CHAIN, + TCA_HW_OFFLOAD, + TCA_INGRESS_BLOCK, + TCA_EGRESS_BLOCK, + TCA_DUMP_FLAGS, + TCA_EXT_WARN_MSG, + __TCA_MAX +}; + +#define TCA_MAX (__TCA_MAX - 1) + +#define TCA_DUMP_FLAGS_TERSE (1 << 0) /* Means that in dump user gets only basic + * data necessary to identify the objects + * (handle, cookie, etc.) and stats. + */ + +#define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg)))) +#define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg)) + +/******************************************************************** + * Neighbor Discovery userland options + ****/ + +struct nduseroptmsg { + unsigned char nduseropt_family; + unsigned char nduseropt_pad1; + unsigned short nduseropt_opts_len; /* Total length of options */ + int nduseropt_ifindex; + __u8 nduseropt_icmp_type; + __u8 nduseropt_icmp_code; + unsigned short nduseropt_pad2; + unsigned int nduseropt_pad3; + /* Followed by one or more ND options */ +}; + +enum { + NDUSEROPT_UNSPEC, + NDUSEROPT_SRCADDR, + __NDUSEROPT_MAX +}; + +#define NDUSEROPT_MAX (__NDUSEROPT_MAX - 1) + +#ifndef __KERNEL__ +/* RTnetlink multicast groups - backwards compatibility for userspace */ +#define RTMGRP_LINK 1 +#define RTMGRP_NOTIFY 2 +#define RTMGRP_NEIGH 4 +#define RTMGRP_TC 8 + +#define RTMGRP_IPV4_IFADDR 0x10 +#define RTMGRP_IPV4_MROUTE 0x20 +#define RTMGRP_IPV4_ROUTE 0x40 +#define RTMGRP_IPV4_RULE 0x80 + +#define RTMGRP_IPV6_IFADDR 0x100 +#define RTMGRP_IPV6_MROUTE 0x200 +#define RTMGRP_IPV6_ROUTE 0x400 +#define RTMGRP_IPV6_IFINFO 0x800 + +#define RTMGRP_DECnet_IFADDR 0x1000 +#define RTMGRP_DECnet_ROUTE 0x4000 + +#define RTMGRP_IPV6_PREFIX 0x20000 +#endif + +/* RTnetlink multicast groups */ +enum rtnetlink_groups { + RTNLGRP_NONE, +#define RTNLGRP_NONE RTNLGRP_NONE + RTNLGRP_LINK, +#define RTNLGRP_LINK RTNLGRP_LINK + RTNLGRP_NOTIFY, +#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY + RTNLGRP_NEIGH, +#define RTNLGRP_NEIGH RTNLGRP_NEIGH + RTNLGRP_TC, +#define RTNLGRP_TC RTNLGRP_TC + RTNLGRP_IPV4_IFADDR, +#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR + RTNLGRP_IPV4_MROUTE, +#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE + RTNLGRP_IPV4_ROUTE, +#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE + RTNLGRP_IPV4_RULE, +#define RTNLGRP_IPV4_RULE RTNLGRP_IPV4_RULE + RTNLGRP_IPV6_IFADDR, +#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR + RTNLGRP_IPV6_MROUTE, +#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE + RTNLGRP_IPV6_ROUTE, +#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE + RTNLGRP_IPV6_IFINFO, +#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO + RTNLGRP_DECnet_IFADDR, +#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR + RTNLGRP_NOP2, + RTNLGRP_DECnet_ROUTE, +#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE + RTNLGRP_DECnet_RULE, +#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE + RTNLGRP_NOP4, + RTNLGRP_IPV6_PREFIX, +#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX + RTNLGRP_IPV6_RULE, +#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE + RTNLGRP_ND_USEROPT, +#define RTNLGRP_ND_USEROPT RTNLGRP_ND_USEROPT + RTNLGRP_PHONET_IFADDR, +#define RTNLGRP_PHONET_IFADDR RTNLGRP_PHONET_IFADDR + RTNLGRP_PHONET_ROUTE, +#define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE + RTNLGRP_DCB, +#define RTNLGRP_DCB RTNLGRP_DCB + RTNLGRP_IPV4_NETCONF, +#define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF + RTNLGRP_IPV6_NETCONF, +#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF + RTNLGRP_MDB, +#define RTNLGRP_MDB RTNLGRP_MDB + RTNLGRP_MPLS_ROUTE, +#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE + RTNLGRP_NSID, +#define RTNLGRP_NSID RTNLGRP_NSID + RTNLGRP_MPLS_NETCONF, +#define RTNLGRP_MPLS_NETCONF RTNLGRP_MPLS_NETCONF + RTNLGRP_IPV4_MROUTE_R, +#define RTNLGRP_IPV4_MROUTE_R RTNLGRP_IPV4_MROUTE_R + RTNLGRP_IPV6_MROUTE_R, +#define RTNLGRP_IPV6_MROUTE_R RTNLGRP_IPV6_MROUTE_R + RTNLGRP_NEXTHOP, +#define RTNLGRP_NEXTHOP RTNLGRP_NEXTHOP + RTNLGRP_BRVLAN, +#define RTNLGRP_BRVLAN RTNLGRP_BRVLAN + RTNLGRP_MCTP_IFADDR, +#define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR + RTNLGRP_TUNNEL, +#define RTNLGRP_TUNNEL RTNLGRP_TUNNEL + RTNLGRP_STATS, +#define RTNLGRP_STATS RTNLGRP_STATS + RTNLGRP_IPV4_MCADDR, +#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR + RTNLGRP_IPV6_MCADDR, +#define RTNLGRP_IPV6_MCADDR RTNLGRP_IPV6_MCADDR + RTNLGRP_IPV6_ACADDR, +#define RTNLGRP_IPV6_ACADDR RTNLGRP_IPV6_ACADDR + __RTNLGRP_MAX +}; +#define RTNLGRP_MAX (__RTNLGRP_MAX - 1) + +/* TC action piece */ +struct tcamsg { + unsigned char tca_family; + unsigned char tca__pad1; + unsigned short tca__pad2; +}; + +enum { + TCA_ROOT_UNSPEC, + TCA_ROOT_TAB, +#define TCA_ACT_TAB TCA_ROOT_TAB +#define TCAA_MAX TCA_ROOT_TAB + TCA_ROOT_FLAGS, + TCA_ROOT_COUNT, + TCA_ROOT_TIME_DELTA, /* in msecs */ + TCA_ROOT_EXT_WARN_MSG, + __TCA_ROOT_MAX, +#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1) +}; + +#define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg)))) +#define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg)) +/* tcamsg flags stored in attribute TCA_ROOT_FLAGS + * + * TCA_ACT_FLAG_LARGE_DUMP_ON user->kernel to request for larger than + * TCA_ACT_MAX_PRIO actions in a dump. All dump responses will contain the + * number of actions being dumped stored in for user app's consumption in + * TCA_ROOT_COUNT + * + * TCA_ACT_FLAG_TERSE_DUMP user->kernel to request terse (brief) dump that only + * includes essential action info (kind, index, etc.) + * + */ +#define TCA_FLAG_LARGE_DUMP_ON (1 << 0) +#define TCA_ACT_FLAG_LARGE_DUMP_ON TCA_FLAG_LARGE_DUMP_ON +#define TCA_ACT_FLAG_TERSE_DUMP (1 << 1) + +/* New extended info filters for IFLA_EXT_MASK */ +#define RTEXT_FILTER_VF (1 << 0) +#define RTEXT_FILTER_BRVLAN (1 << 1) +#define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2) +#define RTEXT_FILTER_SKIP_STATS (1 << 3) +#define RTEXT_FILTER_MRP (1 << 4) +#define RTEXT_FILTER_CFM_CONFIG (1 << 5) +#define RTEXT_FILTER_CFM_STATUS (1 << 6) +#define RTEXT_FILTER_MST (1 << 7) + +/* End of information exported to user level */ + + + +#endif /* _UAPI__LINUX_RTNETLINK_H */ diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 887a25286441..1686861aae20 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -98,43 +98,97 @@ struct statx_timestamp { */ struct statx { /* 0x00 */ - __u32 stx_mask; /* What results were written [uncond] */ - __u32 stx_blksize; /* Preferred general I/O size [uncond] */ - __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* What results were written [uncond] */ + __u32 stx_mask; + + /* Preferred general I/O size [uncond] */ + __u32 stx_blksize; + + /* Flags conveying information about the file [uncond] */ + __u64 stx_attributes; + /* 0x10 */ - __u32 stx_nlink; /* Number of hard links */ - __u32 stx_uid; /* User ID of owner */ - __u32 stx_gid; /* Group ID of owner */ - __u16 stx_mode; /* File mode */ + /* Number of hard links */ + __u32 stx_nlink; + + /* User ID of owner */ + __u32 stx_uid; + + /* Group ID of owner */ + __u32 stx_gid; + + /* File mode */ + __u16 stx_mode; __u16 __spare0[1]; + /* 0x20 */ - __u64 stx_ino; /* Inode number */ - __u64 stx_size; /* File size */ - __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* Inode number */ + __u64 stx_ino; + + /* File size */ + __u64 stx_size; + + /* Number of 512-byte blocks allocated */ + __u64 stx_blocks; + + /* Mask to show what's supported in stx_attributes */ + __u64 stx_attributes_mask; + /* 0x40 */ - struct statx_timestamp stx_atime; /* Last access time */ - struct statx_timestamp stx_btime; /* File creation time */ - struct statx_timestamp stx_ctime; /* Last attribute change time */ - struct statx_timestamp stx_mtime; /* Last data modification time */ + /* Last access time */ + struct statx_timestamp stx_atime; + + /* File creation time */ + struct statx_timestamp stx_btime; + + /* Last attribute change time */ + struct statx_timestamp stx_ctime; + + /* Last data modification time */ + struct statx_timestamp stx_mtime; + /* 0x80 */ - __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_major; __u32 stx_rdev_minor; - __u32 stx_dev_major; /* ID of device containing file [uncond] */ + + /* ID of device containing file [uncond] */ + __u32 stx_dev_major; __u32 stx_dev_minor; + /* 0x90 */ __u64 stx_mnt_id; - __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ - __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ + + /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_mem_align; + + /* File offset alignment for direct I/O */ + __u32 stx_dio_offset_align; + /* 0xa0 */ - __u64 stx_subvol; /* Subvolume identifier */ - __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ - __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* Subvolume identifier */ + __u64 stx_subvol; + + /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_min; + + /* Max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; + /* 0xb0 */ - __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ - __u32 __spare1[1]; - /* 0xb8 */ - __u64 __spare3[9]; /* Spare space for future expansion */ + /* Max atomic write segment count */ + __u32 stx_atomic_write_segments_max; + + /* File offset alignment for direct I/O reads */ + __u32 stx_dio_read_offset_align; + + /* Optimised max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max_opt; + __u32 __spare2[1]; + + /* 0xc0 */ + __u64 __spare3[8]; /* Spare space for future expansion */ + /* 0x100 */ }; @@ -164,6 +218,7 @@ struct statx { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h index c53cde425406..457498259494 100644 --- a/tools/include/uapi/linux/stddef.h +++ b/tools/include/uapi/linux/stddef.h @@ -3,7 +3,6 @@ #define _LINUX_STDDEF_H - #ifndef __always_inline #define __always_inline __inline__ #endif @@ -36,6 +35,11 @@ struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \ } ATTRS +#ifdef __cplusplus +/* sizeof(struct{}) is 1 in C++, not 0, can't use C version of the macro. */ +#define __DECLARE_FLEX_ARRAY(T, member) \ + T member[0] +#else /** * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union * @@ -52,3 +56,23 @@ TYPE NAME[]; \ } #endif + +#ifndef __counted_by +#define __counted_by(m) +#endif + +#ifndef __counted_by_le +#define __counted_by_le(m) +#endif + +#ifndef __counted_by_be +#define __counted_by_be(m) +#endif + +#ifndef __counted_by_ptr +#define __counted_by_ptr(m) +#endif + +#define __kernel_nonstring + +#endif /* _LINUX_STDDEF_H */ diff --git a/tools/include/uapi/linux/taskstats.h b/tools/include/uapi/linux/taskstats.h new file mode 100644 index 000000000000..3ae25f3ce067 --- /dev/null +++ b/tools/include/uapi/linux/taskstats.h @@ -0,0 +1,291 @@ +/* SPDX-License-Identifier: LGPL-2.1 WITH Linux-syscall-note */ +/* taskstats.h - exporting per-task statistics + * + * Copyright (C) Shailabh Nagar, IBM Corp. 2006 + * (C) Balbir Singh, IBM Corp. 2006 + * (C) Jay Lan, SGI, 2006 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef _LINUX_TASKSTATS_H +#define _LINUX_TASKSTATS_H + +#include <linux/types.h> +#include <linux/time_types.h> + +/* Format for per-task data returned to userland when + * - a task exits + * - listener requests stats for a task + * + * The struct is versioned. Newer versions should only add fields to + * the bottom of the struct to maintain backward compatibility. + * + * + * To add new fields + * a) bump up TASKSTATS_VERSION + * b) add comment indicating new version number at end of struct + * c) add new fields after version comment; maintain 64-bit alignment + */ + + +#define TASKSTATS_VERSION 17 +#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN + * in linux/sched.h */ + +struct taskstats { + + /* The version number of this struct. This field is always set to + * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>. + * Each time the struct is changed, the value should be incremented. + */ + __u16 version; + __u32 ac_exitcode; /* Exit status */ + + /* The accounting flags of a task as defined in <linux/acct.h> + * Defined values are AFORK, ASU, ACOMPAT, ACORE, AXSIG, and AGROUP. + * (AGROUP since version 12). + */ + __u8 ac_flag; /* Record flags */ + __u8 ac_nice; /* task_nice */ + + /* Delay accounting fields start + * + * All values, until comment "Delay accounting fields end" are + * available only if delay accounting is enabled, even though the last + * few fields are not delays + * + * xxx_count is the number of delay values recorded + * xxx_delay_total is the corresponding cumulative delay in nanoseconds + * + * xxx_delay_total wraps around to zero on overflow + * xxx_count incremented regardless of overflow + */ + + /* Delay waiting for cpu, while runnable + * count, delay_total NOT updated atomically + */ + __u64 cpu_count __attribute__((aligned(8))); + __u64 cpu_delay_total; + + /* Following four fields atomically updated using task->delays->lock */ + + /* Delay waiting for synchronous block I/O to complete + * does not account for delays in I/O submission + */ + __u64 blkio_count; + __u64 blkio_delay_total; + + /* Delay waiting for page fault I/O (swap in only) */ + __u64 swapin_count; + __u64 swapin_delay_total; + + /* cpu "wall-clock" running time + * On some architectures, value will adjust for cpu time stolen + * from the kernel in involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_real_total; + + /* cpu "virtual" running time + * Uses time intervals seen by the kernel i.e. no adjustment + * for kernel's involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_virtual_total; + /* Delay accounting fields end */ + /* version 1 ends here */ + + /* Basic Accounting Fields start */ + char ac_comm[TS_COMM_LEN]; /* Command name */ + __u8 ac_sched __attribute__((aligned(8))); + /* Scheduling discipline */ + __u8 ac_pad[3]; + __u32 ac_uid __attribute__((aligned(8))); + /* User ID */ + __u32 ac_gid; /* Group ID */ + __u32 ac_pid; /* Process ID */ + __u32 ac_ppid; /* Parent process ID */ + /* __u32 range means times from 1970 to 2106 */ + __u32 ac_btime; /* Begin time [sec since 1970] */ + __u64 ac_etime __attribute__((aligned(8))); + /* Elapsed time [usec] */ + __u64 ac_utime; /* User CPU time [usec] */ + __u64 ac_stime; /* SYstem CPU time [usec] */ + __u64 ac_minflt; /* Minor Page Fault Count */ + __u64 ac_majflt; /* Major Page Fault Count */ + /* Basic Accounting Fields end */ + + /* Extended accounting fields start */ + /* Accumulated RSS usage in duration of a task, in MBytes-usecs. + * The current rss usage is added to this counter every time + * a tick is charged to a task's system time. So, at the end we + * will have memory usage multiplied by system time. Thus an + * average usage per system time unit can be calculated. + */ + __u64 coremem; /* accumulated RSS usage in MB-usec */ + /* Accumulated virtual memory usage in duration of a task. + * Same as acct_rss_mem1 above except that we keep track of VM usage. + */ + __u64 virtmem; /* accumulated VM usage in MB-usec */ + + /* High watermark of RSS and virtual memory usage in duration of + * a task, in KBytes. + */ + __u64 hiwater_rss; /* High-watermark of RSS usage, in KB */ + __u64 hiwater_vm; /* High-water VM usage, in KB */ + + /* The following four fields are I/O statistics of a task. */ + __u64 read_char; /* bytes read */ + __u64 write_char; /* bytes written */ + __u64 read_syscalls; /* read syscalls */ + __u64 write_syscalls; /* write syscalls */ + /* Extended accounting fields end */ + +#define TASKSTATS_HAS_IO_ACCOUNTING + /* Per-task storage I/O accounting starts */ + __u64 read_bytes; /* bytes of read I/O */ + __u64 write_bytes; /* bytes of write I/O */ + __u64 cancelled_write_bytes; /* bytes of cancelled write I/O */ + + __u64 nvcsw; /* voluntary_ctxt_switches */ + __u64 nivcsw; /* nonvoluntary_ctxt_switches */ + + /* time accounting for SMT machines */ + __u64 ac_utimescaled; /* utime scaled on frequency etc */ + __u64 ac_stimescaled; /* stime scaled on frequency etc */ + __u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */ + + /* Delay waiting for memory reclaim */ + __u64 freepages_count; + __u64 freepages_delay_total; + + + /* Delay waiting for thrashing page */ + __u64 thrashing_count; + __u64 thrashing_delay_total; + + /* v10: 64-bit btime to avoid overflow */ + __u64 ac_btime64; /* 64-bit begin time */ + + /* v11: Delay waiting for memory compact */ + __u64 compact_count; + __u64 compact_delay_total; + + /* v12 begin */ + __u32 ac_tgid; /* thread group ID */ + /* Thread group walltime up to now. This is total process walltime if + * AGROUP flag is set. + */ + __u64 ac_tgetime __attribute__((aligned(8))); + /* Lightweight information to identify process binary files. + * This leaves userspace to match this to a file system path, using + * MAJOR() and MINOR() macros to identify a device and mount point, + * the inode to identify the executable file. This is /proc/self/exe + * at the end, so matching the most recent exec(). Values are zero + * for kernel threads. + */ + __u64 ac_exe_dev; /* program binary device ID */ + __u64 ac_exe_inode; /* program binary inode number */ + /* v12 end */ + + /* v13: Delay waiting for write-protect copy */ + __u64 wpcopy_count; + __u64 wpcopy_delay_total; + + /* v14: Delay waiting for IRQ/SOFTIRQ */ + __u64 irq_count; + __u64 irq_delay_total; + + /* v15: add Delay max and Delay min */ + + /* v16: move Delay max and Delay min to the end of taskstat */ + __u64 cpu_delay_max; + __u64 cpu_delay_min; + + __u64 blkio_delay_max; + __u64 blkio_delay_min; + + __u64 swapin_delay_max; + __u64 swapin_delay_min; + + __u64 freepages_delay_max; + __u64 freepages_delay_min; + + __u64 thrashing_delay_max; + __u64 thrashing_delay_min; + + __u64 compact_delay_max; + __u64 compact_delay_min; + + __u64 wpcopy_delay_max; + __u64 wpcopy_delay_min; + + __u64 irq_delay_max; + __u64 irq_delay_min; + + /*v17: delay max timestamp record*/ + struct __kernel_timespec cpu_delay_max_ts; + struct __kernel_timespec blkio_delay_max_ts; + struct __kernel_timespec swapin_delay_max_ts; + struct __kernel_timespec freepages_delay_max_ts; + struct __kernel_timespec thrashing_delay_max_ts; + struct __kernel_timespec compact_delay_max_ts; + struct __kernel_timespec wpcopy_delay_max_ts; + struct __kernel_timespec irq_delay_max_ts; +}; + + +/* + * Commands sent from userspace + * Not versioned. New commands should only be inserted at the enum's end + * prior to __TASKSTATS_CMD_MAX + */ + +enum { + TASKSTATS_CMD_UNSPEC = 0, /* Reserved */ + TASKSTATS_CMD_GET, /* user->kernel request/get-response */ + TASKSTATS_CMD_NEW, /* kernel->user event */ + __TASKSTATS_CMD_MAX, +}; + +#define TASKSTATS_CMD_MAX (__TASKSTATS_CMD_MAX - 1) + +enum { + TASKSTATS_TYPE_UNSPEC = 0, /* Reserved */ + TASKSTATS_TYPE_PID, /* Process id */ + TASKSTATS_TYPE_TGID, /* Thread group id */ + TASKSTATS_TYPE_STATS, /* taskstats structure */ + TASKSTATS_TYPE_AGGR_PID, /* contains pid + stats */ + TASKSTATS_TYPE_AGGR_TGID, /* contains tgid + stats */ + TASKSTATS_TYPE_NULL, /* contains nothing */ + __TASKSTATS_TYPE_MAX, +}; + +#define TASKSTATS_TYPE_MAX (__TASKSTATS_TYPE_MAX - 1) + +enum { + TASKSTATS_CMD_ATTR_UNSPEC = 0, + TASKSTATS_CMD_ATTR_PID, + TASKSTATS_CMD_ATTR_TGID, + TASKSTATS_CMD_ATTR_REGISTER_CPUMASK, + TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK, + __TASKSTATS_CMD_ATTR_MAX, +}; + +#define TASKSTATS_CMD_ATTR_MAX (__TASKSTATS_CMD_ATTR_MAX - 1) + +/* NETLINK_GENERIC related info */ + +#define TASKSTATS_GENL_NAME "TASKSTATS" +#define TASKSTATS_GENL_VERSION 0x1 + +#endif /* _LINUX_TASKSTATS_H */ diff --git a/tools/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h index 91fa51a9c31d..85aa327245c6 100644 --- a/tools/include/uapi/linux/types.h +++ b/tools/include/uapi/linux/types.h @@ -4,6 +4,8 @@ #include <asm-generic/int-ll64.h> +#ifndef __ASSEMBLER__ + /* copied from linux:include/uapi/linux/types.h */ #define __bitwise typedef __u16 __bitwise __le16; @@ -20,4 +22,5 @@ typedef __u32 __bitwise __wsum; #define __aligned_be64 __be64 __attribute__((aligned(8))) #define __aligned_le64 __le64 __attribute__((aligned(8))) +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_LINUX_TYPES_H */ diff --git a/tools/include/vdso/unaligned.h b/tools/include/vdso/unaligned.h index eee3d2a4dbe4..9076483c9fbb 100644 --- a/tools/include/vdso/unaligned.h +++ b/tools/include/vdso/unaligned.h @@ -2,14 +2,43 @@ #ifndef __VDSO_UNALIGNED_H #define __VDSO_UNALIGNED_H -#define __get_unaligned_t(type, ptr) ({ \ - const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x; \ +#include <linux/compiler_types.h> + +/** + * __get_unaligned_t - read an unaligned value from memory. + * @type: the type to load from the pointer. + * @ptr: the pointer to load from. + * + * Use memcpy to affect an unaligned type sized load avoiding undefined behavior + * from approaches like type punning that require -fno-strict-aliasing in order + * to be correct. As type may be const, use __unqual_scalar_typeof to map to a + * non-const type - you can't memcpy into a const type. The + * __get_unaligned_ctrl_type gives __unqual_scalar_typeof its required + * expression rather than type, a pointer is used to avoid warnings about mixing + * the use of 0 and NULL. The void* cast silences ubsan warnings. + */ +#define __get_unaligned_t(type, ptr) ({ \ + type *__get_unaligned_ctrl_type __always_unused = NULL; \ + __unqual_scalar_typeof(*__get_unaligned_ctrl_type) __get_unaligned_val; \ + __builtin_memcpy(&__get_unaligned_val, (void *)(ptr), \ + sizeof(__get_unaligned_val)); \ + __get_unaligned_val; \ }) -#define __put_unaligned_t(type, val, ptr) do { \ - struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x = (val); \ +/** + * __put_unaligned_t - write an unaligned value to memory. + * @type: the type of the value to store. + * @val: the value to store. + * @ptr: the pointer to store to. + * + * Use memcpy to affect an unaligned type sized store avoiding undefined + * behavior from approaches like type punning that require -fno-strict-aliasing + * in order to be correct. The void* cast silences ubsan warnings. + */ +#define __put_unaligned_t(type, val, ptr) do { \ + type __put_unaligned_val = (val); \ + __builtin_memcpy((void *)(ptr), &__put_unaligned_val, \ + sizeof(__put_unaligned_val)); \ } while (0) #endif /* __VDSO_UNALIGNED_H */ |
