#ifndef ASM_X86__PDA_H
#define ASM_X86__PDA_H

#ifndef __ASSEMBLY__
#include <linux/stddef.h>
#include <linux/types.h>
#include <linux/cache.h>
#include <asm/page.h>

/* Per processor datastructure. %gs points to it while the kernel runs */
struct x8664_pda {
	struct task_struct *pcurrent;	/* 0  Current process */
	unsigned long data_offset;	/* 8 Per cpu data offset from linker
					   address */
	unsigned long kernelstack;	/* 16 top of kernel stack for current */
	unsigned long oldrsp;		/* 24 user rsp for system call */
	int irqcount;			/* 32 Irq nesting counter. Starts -1 */
	unsigned int cpunumber;		/* 36 Logical CPU number */
#ifdef CONFIG_CC_STACKPROTECTOR
	unsigned long stack_canary;	/* 40 stack canary value */
					/* gcc-ABI: this canary MUST be at
					   offset 40!!! */
#endif
	char *irqstackptr;
	short nodenumber;		/* number of current node (32k max) */
	short in_bootmem;		/* pda lives in bootmem */
	unsigned int __softirq_pending;
	unsigned int __nmi_count;	/* number of NMI on this CPUs */
	short mmu_state;
	short isidle;
	struct mm_struct *active_mm;
	unsigned apic_timer_irqs;
	unsigned irq0_irqs;
	unsigned irq_resched_count;
	unsigned irq_call_count;
	unsigned irq_tlb_count;
	unsigned irq_thermal_count;
	unsigned irq_threshold_count;
	unsigned irq_spurious_count;
} ____cacheline_aligned_in_smp;

extern struct x8664_pda **_cpu_pda;
extern void pda_init(int);

#define cpu_pda(i) (_cpu_pda[i])

/*
 * There is no fast way to get the base address of the PDA, all the accesses
 * have to mention %fs/%gs.  So it needs to be done this Torvaldian way.
 */
extern void __bad_pda_field(void) __attribute__((noreturn));

/*
 * proxy_pda doesn't actually exist, but tell gcc it is accessed for
 * all PDA accesses so it gets read/write dependencies right.
 */
extern struct x8664_pda _proxy_pda;

#define pda_offset(field) offsetof(struct x8664_pda, field)

#define pda_to_op(op, field, val)					\
do {									\
	typedef typeof(_proxy_pda.field) T__;				\
	if (0) { T__ tmp__; tmp__ = (val); }	/* type checking */	\
	switch (sizeof(_proxy_pda.field)) {				\
	case 2:								\
		asm(op "w %1,%%gs:%c2" :				\
		    "+m" (_proxy_pda.field) :				\
		    "ri" ((T__)val),					\
		    "i"(pda_offset(field)));				\
		break;							\
	case 4:								\
		asm(op "l %1,%%gs:%c2" :				\
		    "+m" (_proxy_pda.field) :				\
		    "ri" ((T__)val),					\
		    "i" (pda_offset(field)));				\
		break;							\
	case 8:								\
		asm(op "q %1,%%gs:%c2":					\
		    "+m" (_proxy_pda.field) :				\
		    "ri" ((T__)val),					\
		    "i"(pda_offset(field)));				\
		break;							\
	default:							\
		__bad_pda_field();					\
	}								\
} while (0)

#define pda_from_op(op, field)			\
({						\
	typeof(_proxy_pda.field) ret__;		\
	switch (sizeof(_proxy_pda.field)) {	\
	case 2:					\
		asm(op "w %%gs:%c1,%0" :	\
		    "=r" (ret__) :		\
		    "i" (pda_offset(field)),	\
		    "m" (_proxy_pda.field));	\
		break;				\
	case 4:					\
		asm(op "l %%gs:%c1,%0":		\
		    "=r" (ret__):		\
		    "i" (pda_offset(field)),	\
		    "m" (_proxy_pda.field));	\
		break;				\
	case 8:					\
		asm(op "q %%gs:%c1,%0":		\
		    "=r" (ret__) :		\
		    "i" (pda_offset(field)),	\
		    "m" (_proxy_pda.field));	\
		break;				\
	default:				\
		__bad_pda_field();		\
	}					\
	ret__;					\
})

#define read_pda(field)		pda_from_op("mov", field)
#define write_pda(field, val)	pda_to_op("mov", field, val)
#define add_pda(field, val)	pda_to_op("add", field, val)
#define sub_pda(field, val)	pda_to_op("sub", field, val)
#define or_pda(field, val)	pda_to_op("or", field, val)

/* This is not atomic against other CPUs -- CPU preemption needs to be off */
#define test_and_clear_bit_pda(bit, field)				\
({									\
	int old__;							\
	asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0"			\
		     : "=r" (old__), "+m" (_proxy_pda.field)		\
		     : "dIr" (bit), "i" (pda_offset(field)) : "memory");\
	old__;								\
})

#endif

#define PDA_STACKOFFSET (5*8)

#endif /* ASM_X86__PDA_H */