diff options
236 files changed, 5779 insertions, 1399 deletions
diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/Intel-IOMMU.txt new file mode 100644 index 000000000000..c2321903aa09 --- /dev/null +++ b/Documentation/Intel-IOMMU.txt @@ -0,0 +1,115 @@ +Linux IOMMU Support +=================== + +The architecture spec can be obtained from the below location. + +http://www.intel.com/technology/virtualization/ + +This guide gives a quick cheat sheet for some basic understanding. + +Some Keywords + +DMAR - DMA remapping +DRHD - DMA Engine Reporting Structure +RMRR - Reserved memory Region Reporting Structure +ZLR - Zero length reads from PCI devices +IOVA - IO Virtual address. + +Basic stuff +----------- + +ACPI enumerates and lists the different DMA engines in the platform, and +device scope relationships between PCI devices and which DMA engine controls +them. + +What is RMRR? +------------- + +There are some devices the BIOS controls, for e.g USB devices to perform +PS2 emulation. The regions of memory used for these devices are marked +reserved in the e820 map. When we turn on DMA translation, DMA to those +regions will fail. Hence BIOS uses RMRR to specify these regions along with +devices that need to access these regions. OS is expected to setup +unity mappings for these regions for these devices to access these regions. + +How is IOVA generated? +--------------------- + +Well behaved drivers call pci_map_*() calls before sending command to device +that needs to perform DMA. Once DMA is completed and mapping is no longer +required, device performs a pci_unmap_*() calls to unmap the region. + +The Intel IOMMU driver allocates a virtual address per domain. Each PCIE +device has its own domain (hence protection). Devices under p2p bridges +share the virtual address with all devices under the p2p bridge due to +transaction id aliasing for p2p bridges. + +IOVA generation is pretty generic. We used the same technique as vmalloc() +but these are not global address spaces, but separate for each domain. +Different DMA engines may support different number of domains. + +We also allocate gaurd pages with each mapping, so we can attempt to catch +any overflow that might happen. + + +Graphics Problems? +------------------ +If you encounter issues with graphics devices, you can try adding +option intel_iommu=igfx_off to turn off the integrated graphics engine. + +If it happens to be a PCI device included in the INCLUDE_ALL Engine, +then try enabling CONFIG_DMAR_GFX_WA to setup a 1-1 map. We hear +graphics drivers may be in process of using DMA api's in the near +future and at that time this option can be yanked out. + +Some exceptions to IOVA +----------------------- +Interrupt ranges are not address translated, (0xfee00000 - 0xfeefffff). +The same is true for peer to peer transactions. Hence we reserve the +address from PCI MMIO ranges so they are not allocated for IOVA addresses. + + +Fault reporting +--------------- +When errors are reported, the DMA engine signals via an interrupt. The fault +reason and device that caused it with fault reason is printed on console. + +See below for sample. + + +Boot Message Sample +------------------- + +Something like this gets printed indicating presence of DMAR tables +in ACPI. + +ACPI: DMAR (v001 A M I OEMDMAR 0x00000001 MSFT 0x00000097) @ 0x000000007f5b5ef0 + +When DMAR is being processed and initialized by ACPI, prints DMAR locations +and any RMRR's processed. + +ACPI DMAR:Host address width 36 +ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed90000 +ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed91000 +ACPI DMAR:DRHD (flags: 0x00000001)base: 0x00000000fed93000 +ACPI DMAR:RMRR base: 0x00000000000ed000 end: 0x00000000000effff +ACPI DMAR:RMRR base: 0x000000007f600000 end: 0x000000007fffffff + +When DMAR is enabled for use, you will notice.. + +PCI-DMA: Using DMAR IOMMU + +Fault reporting +--------------- + +DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000 +DMAR:[fault reason 05] PTE Write access is not set +DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000 +DMAR:[fault reason 05] PTE Write access is not set + +TBD +---- + +- For compatibility testing, could use unity map domain for all devices, just + provide a 1-1 for all useful memory under a single domain for all devices. +- API for paravirt ops for abstracting functionlity for VMM folks. diff --git a/Documentation/filesystems/Exporting b/Documentation/filesystems/Exporting index 31047e0fe14b..87019d2b5981 100644 --- a/Documentation/filesystems/Exporting +++ b/Documentation/filesystems/Exporting @@ -2,9 +2,12 @@ Making Filesystems Exportable ============================= -Most filesystem operations require a dentry (or two) as a starting +Overview +-------- + +All filesystem operations require a dentry (or two) as a starting point. Local applications have a reference-counted hold on suitable -dentrys via open file descriptors or cwd/root. However remote +dentries via open file descriptors or cwd/root. However remote applications that access a filesystem via a remote filesystem protocol such as NFS may not be able to hold such a reference, and so need a different way to refer to a particular dentry. As the alternative @@ -13,14 +16,14 @@ server-reboot (among other things, though these tend to be the most problematic), there is no simple answer like 'filename'. The mechanism discussed here allows each filesystem implementation to -specify how to generate an opaque (out side of the filesystem) byte +specify how to generate an opaque (outside of the filesystem) byte string for any dentry, and how to find an appropriate dentry for any given opaque byte string. This byte string will be called a "filehandle fragment" as it corresponds to part of an NFS filehandle. A filesystem which supports the mapping between filehandle fragments -and dentrys will be termed "exportable". +and dentries will be termed "exportable". @@ -89,11 +92,9 @@ For a filesystem to be exportable it must: 1/ provide the filehandle fragment routines described below. 2/ make sure that d_splice_alias is used rather than d_add when ->lookup finds an inode for a given parent and name. - Typically the ->lookup routine will end: - if (inode) - return d_splice(inode, dentry); - d_add(dentry, inode); - return NULL; + Typically the ->lookup routine will end with a: + + return d_splice_alias(inode, dentry); } @@ -101,67 +102,39 @@ For a filesystem to be exportable it must: A file system implementation declares that instances of the filesystem are exportable by setting the s_export_op field in the struct super_block. This field must point to a "struct export_operations" -struct which could potentially be full of NULLs, though normally at -least get_parent will be set. - - The primary operations are decode_fh and encode_fh. -decode_fh takes a filehandle fragment and tries to find or create a -dentry for the object referred to by the filehandle. -encode_fh takes a dentry and creates a filehandle fragment which can -later be used to find/create a dentry for the same object. - -decode_fh will probably make use of "find_exported_dentry". -This function lives in the "exportfs" module which a filesystem does -not need unless it is being exported. So rather that calling -find_exported_dentry directly, each filesystem should call it through -the find_exported_dentry pointer in it's export_operations table. -This field is set correctly by the exporting agent (e.g. nfsd) when a -filesystem is exported, and before any export operations are called. - -find_exported_dentry needs three support functions from the -filesystem: - get_name. When given a parent dentry and a child dentry, this - should find a name in the directory identified by the parent - dentry, which leads to the object identified by the child dentry. - If no get_name function is supplied, a default implementation is - provided which uses vfs_readdir to find potential names, and - matches inode numbers to find the correct match. - - get_parent. When given a dentry for a directory, this should return - a dentry for the parent. Quite possibly the parent dentry will - have been allocated by d_alloc_anon. - The default get_parent function just returns an error so any - filehandle lookup that requires finding a parent will fail. - ->lookup("..") is *not* used as a default as it can leave ".." - entries in the dcache which are too messy to work with. - - get_dentry. When given an opaque datum, this should find the - implied object and create a dentry for it (possibly with - d_alloc_anon). - The opaque datum is whatever is passed down by the decode_fh - function, and is often simply a fragment of the filehandle - fragment. - decode_fh passes two datums through find_exported_dentry. One that - should be used to identify the target object, and one that can be - used to identify the object's parent, should that be necessary. - The default get_dentry function assumes that the datum contains an - inode number and a generation number, and it attempts to get the - inode using "iget" and check it's validity by matching the - generation number. A filesystem should only depend on the default - if iget can safely be used this way. - -If decode_fh and/or encode_fh are left as NULL, then default -implementations are used. These defaults are suitable for ext2 and -extremely similar filesystems (like ext3). - -The default encode_fh creates a filehandle fragment from the inode -number and generation number of the target together with the inode -number and generation number of the parent (if the parent is -required). - -The default decode_fh extract the target and parent datums from the -filehandle assuming the format used by the default encode_fh and -passed them to find_exported_dentry. +struct which has the following members: + + encode_fh (optional) + Takes a dentry and creates a filehandle fragment which can later be used + to find or create a dentry for the same object. The default + implementation creates a filehandle fragment that encodes a 32bit inode + and generation number for the inode encoded, and if necessary the + same information for the parent. + + fh_to_dentry (mandatory) + Given a filehandle fragment, this should find the implied object and + create a dentry for it (possibly with d_alloc_anon). + + fh_to_parent (optional but strongly recommended) + Given a filehandle fragment, this should find the parent of the + implied object and create a dentry for it (possibly with d_alloc_anon). + May fail if the filehandle fragment is too small. + + get_parent (optional but strongly recommended) + When given a dentry for a directory, this should return a dentry for + the parent. Quite possibly the parent dentry will have been allocated + by d_alloc_anon. The default get_parent function just returns an error + so any filehandle lookup that requires finding a parent will fail. + ->lookup("..") is *not* used as a default as it can leave ".." entries + in the dcache which are too messy to work with. + + get_name (optional) + When given a parent dentry and a child dentry, this should find a name + in the directory identified by the parent dentry, which leads to the + object identified by the child dentry. If no get_name function is + supplied, a default implementation is provided which uses vfs_readdir + to find potential names, and matches inode numbers to find the correct + match. A filehandle fragment consists of an array of 1 or more 4byte words, @@ -172,5 +145,3 @@ generated by encode_fh, in which case it will have been padded with nuls. Rather, the encode_fh routine should choose a "type" which indicates the decode_fh how much of the filehandle is valid, and how it should be interpreted. - - diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt index 35985b34d5a6..2f75e750e4f5 100644 --- a/Documentation/i386/boot.txt +++ b/Documentation/i386/boot.txt @@ -168,6 +168,8 @@ Offset Proto Name Meaning 0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not 0235/3 N/A pad2 Unused 0238/4 2.06+ cmdline_size Maximum size of the kernel command line +023C/4 2.07+ hardware_subarch Hardware subarchitecture +0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data (1) For backwards compatibility, if the setup_sects field contains 0, the real value is 4. @@ -204,7 +206,7 @@ boot loaders can ignore those fields. The byte order of all fields is littleendian (this is x86, after all.) -Field name: setup_secs +Field name: setup_sects Type: read Offset/size: 0x1f1/1 Protocol: ALL @@ -356,6 +358,13 @@ Protocol: 2.00+ - If 0, the protected-mode code is loaded at 0x10000. - If 1, the protected-mode code is loaded at 0x100000. + Bit 6 (write): KEEP_SEGMENTS + Protocol: 2.07+ + - if 0, reload the segment registers in the 32bit entry point. + - if 1, do not reload the segment registers in the 32bit entry point. + Assume that %cs %ds %ss %es are all set to flat segments with + a base of 0 (or the equivalent for their environment). + Bit 7 (write): CAN_USE_HEAP Set this bit to 1 to indicate that the value entered in the heap_end_ptr is valid. If this field is clear, some setup code @@ -480,6 +489,29 @@ Protocol: 2.06+ cmdline_size characters. With protocol version 2.05 and earlier, the maximum size was 255. +Field name: hardware_subarch +Type: write +Offset/size: 0x23c/4 +Protocol: 2.07+ + + In a paravirtualized environment the hardware low level architectural + pieces such as interrupt handling, page table handling, and + accessing process control registers needs to be done differently. + + This field allows the bootloader to inform the kernel we are in one + one of those environments. + + 0x00000000 The default x86/PC environment + 0x00000001 lguest + 0x00000002 Xen + +Field name: hardware_subarch_data +Type: write +Offset/size: 0x240/8 +Protocol: 2.07+ + + A pointer to data that is specific to hardware subarch + **** THE KERNEL COMMAND LINE diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index 6166e2d7da76..7a7753321a26 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -519,17 +519,17 @@ more details, with real examples. to the user why it stops. cc-cross-prefix - cc-cross-prefix is used to check if there exist a $(CC) in path with + cc-cross-prefix is used to check if there exists a $(CC) in path with one of the listed prefixes. The first prefix where there exist a prefix$(CC) in the PATH is returned - and if no prefix$(CC) is found then nothing is returned. Additional prefixes are separated by a single space in the call of cc-cross-prefix. - This functionality is usefull for architecture Makefile that try - to set CROSS_COMPILE to well know values but may have several + This functionality is useful for architecture Makefiles that try + to set CROSS_COMPILE to well-known values but may have several values to select between. - It is recommended only to try to set CROSS_COMPILE is it is a cross - build (host arch is different from target arch). And is CROSS_COMPILE + It is recommended only to try to set CROSS_COMPILE if it is a cross + build (host arch is different from target arch). And if CROSS_COMPILE is already set then leave it with the old value. Example: diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6accd360da73..b2361667839f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -772,6 +772,23 @@ and is between 256 and 4096 characters. It is defined in the file inttest= [IA64] + intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option + off + Disable intel iommu driver. + igfx_off [Default Off] + By default, gfx is mapped as normal device. If a gfx + device has a dedicated DMAR unit, the DMAR unit is + bypassed by not enabling DMAR with this option. In + this case, gfx device will use physical address for + DMA. + forcedac [x86_64] + With this option iommu will not optimize to look + for io virtual address below 32 bit forcing dual + address cycle on pci bus for cards supporting greater + than 32 bit addressing. The default is to look + for translation below 32 bit and if not available + then look in the higher range. + io7= [HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in arch/alpha/kernel/core_marvel.c. diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt index 5fbcc22c98e9..168117bd6ee8 100644 --- a/Documentation/memory-hotplug.txt +++ b/Documentation/memory-hotplug.txt @@ -2,7 +2,8 @@ Memory Hotplug ============== -Last Updated: Jul 28 2007 +Created: Jul 28 2007 +Add description of notifier of memory hotplug Oct 11 2007 This document is about memory hotplug including how-to-use and current status. Because Memory Hotplug is still under development, contents of this text will @@ -24,7 +25,8 @@ be changed often. 6.1 Memory offline and ZONE_MOVABLE 6.2. How to offline memory 7. Physical memory remove -8. Future Work List +8. Memory hotplug event notifier +9. Future Work List Note(1): x86_64's has special implementation for memory hotplug. This text does not describe it. @@ -307,8 +309,58 @@ Need more implementation yet.... - Notification completion of remove works by OS to firmware. - Guard from remove if not yet. +-------------------------------- +8. Memory hotplug event notifier +-------------------------------- +Memory hotplug has event notifer. There are 6 types of notification. + +MEMORY_GOING_ONLINE + Generated before new memory becomes available in order to be able to + prepare subsystems to handle memory. The page allocator is still unable + to allocate from the new memory. + +MEMORY_CANCEL_ONLINE + Generated if MEMORY_GOING_ONLINE fails. + +MEMORY_ONLINE + Generated when memory has succesfully brought online. The callback may + allocate pages from the new memory. + +MEMORY_GOING_OFFLINE + Generated to begin the process of offlining memory. Allocations are no + longer possible from the memory but some of the memory to be offlined + is still in use. The callback can be used to free memory known to a + subsystem from the indicated memory section. + +MEMORY_CANCEL_OFFLINE + Generated if MEMORY_GOING_OFFLINE fails. Memory is available again from + the section that we attempted to offline. + +MEMORY_OFFLINE + Generated after offlining memory is complete. + +A callback routine can be registered by + hotplug_memory_notifier(callback_func, priority) + +The second argument of callback function (action) is event types of above. +The third argument is passed by pointer of struct memory_notify. + +struct memory_notify { + unsigned long start_pfn; + unsigned long nr_pages; + int status_cahnge_nid; +} + +start_pfn is start_pfn of online/offline memory. +nr_pages is # of pages of online/offline memory. +status_change_nid is set node id when N_HIGH_MEMORY of nodemask is (will be) +set/clear. It means a new(memoryless) node gets new memory by online and a +node loses all memory. If this is -1, then nodemask status is not changed. +If status_changed_nid >= 0, callback should create/discard structures for the +node if necessary. + -------------- -8. Future Work +9. Future Work -------------- - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like sysctl or new control file. @@ -1505,15 +1505,16 @@ quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN $(wildcard $(rm-files)) # and we build for the host arch quiet_cmd_depmod = DEPMOD $(KERNELRELEASE) cmd_depmod = \ - if [ -r System.map -a -x $(DEPMOD) -a "$(SUBARCH)" = "$(ARCH)" ]; then \ + if [ -r System.map -a -x $(DEPMOD) ]; then \ $(DEPMOD) -ae -F System.map \ $(if $(strip $(INSTALL_MOD_PATH)), -b $(INSTALL_MOD_PATH) -r) \ $(KERNELRELEASE); \ fi # Create temporary dir for module support files -cmd_crmodverdir = $(Q)mkdir -p $(MODVERDIR); rm -f $(MODVERDIR)/* - +# clean it up only when building all modules +cmd_crmodverdir = $(Q)mkdir -p $(MODVERDIR) \ + $(if $(KBUILD_MODULES),; rm -f $(MODVERDIR)/*) a_flags = -Wp,-MD,$(depfile) $(KBUILD_AFLAGS) $(AFLAGS_KERNEL) \ $(NOSTDINC_FLAGS) $(KBUILD_CPPFLAGS) \ diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index e1c470752ebc..ee07dceae1d4 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -465,7 +465,7 @@ EXPORT_SYMBOL(pci_free_consistent); Write dma_length of each leader with the combined lengths of the mergable followers. */ -#define SG_ENT_VIRT_ADDRESS(SG) (page_address((SG)->page) + (SG)->offset) +#define SG_ENT_VIRT_ADDRESS(SG) (sg_virt((SG))) #define SG_ENT_PHYS_ADDRESS(SG) __pa(SG_ENT_VIRT_ADDRESS(SG)) static void diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index 44ab0dad4035..9d371e476552 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -442,7 +442,7 @@ dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, BUG_ON(dir == DMA_NONE); for (i = 0; i < nents; i++, sg++) { - struct page *page = sg->page; + struct page *page = sg_page(sg); unsigned int offset = sg->offset; unsigned int length = sg->length; void *ptr = page_address(page) + offset; diff --git a/arch/blackfin/kernel/dma-mapping.c b/arch/blackfin/kernel/dma-mapping.c index 94d7b119b71e..a16cb03c5291 100644 --- a/arch/blackfin/kernel/dma-mapping.c +++ b/arch/blackfin/kernel/dma-mapping.c @@ -160,8 +160,7 @@ dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, BUG_ON(direction == DMA_NONE); for (i = 0; i < nents; i++, sg++) { - sg->dma_address = (dma_addr_t)(page_address(sg->page) + - sg->offset); + sg->dma_address = (dma_addr_t) sg_virt(sg); invalidate_dcache_range(sg_dma_address(sg), sg_dma_address(sg) + diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 3c95f4184b99..bc859a311eaf 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -246,7 +246,7 @@ static int reserve_sba_gart = 1; static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t); static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t); -#define sba_sg_address(sg) (page_address((sg)->page) + (sg)->offset) +#define sba_sg_address(sg) sg_virt((sg)) #ifdef FULL_VALID_PDIR static u64 prefetch_spill_page; diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c index a3a558a06757..6ef9b5219930 100644 --- a/arch/ia64/hp/sim/simscsi.c +++ b/arch/ia64/hp/sim/simscsi.c @@ -131,7 +131,7 @@ simscsi_sg_readwrite (struct scsi_cmnd *sc, int mode, unsigned long offset) stat.fd = desc[sc->device->id]; scsi_for_each_sg(sc, sl, scsi_sg_count(sc), i) { - req.addr = __pa(page_address(sl->page) + sl->offset); + req.addr = __pa(sg_virt(sl)); req.len = sl->length; if (DBG) printk("simscsi_sg_%s @ %lx (off %lx) use_sg=%d len=%d\n", @@ -212,7 +212,7 @@ static void simscsi_fillresult(struct scsi_cmnd *sc, char *buf, unsigned len) if (!len) break; thislen = min(len, slp->length); - memcpy(page_address(slp->page) + slp->offset, buf, thislen); + memcpy(sg_virt(slp), buf, thislen); len -= thislen; } } diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 8e4894b205e2..3f7ea13358e9 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -1090,7 +1090,8 @@ efi_memmap_init(unsigned long *s, unsigned long *e) void efi_initialize_iomem_resources(struct resource *code_resource, - struct resource *data_resource) + struct resource *data_resource, + struct resource *bss_resource) { struct resource *res; void *efi_map_start, *efi_map_end, *p; @@ -1171,6 +1172,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, */ insert_resource(res, code_resource); insert_resource(res, data_resource); + insert_resource(res, bss_resource); #ifdef CONFIG_KEXEC insert_resource(res, &efi_memmap_res); insert_resource(res, &boot_param_res); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index cbf67f1aa291..ae6c3c02e117 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -90,7 +90,12 @@ static struct resource code_resource = { .name = "Kernel code", .flags = IORESOURCE_BUSY | IORESOURCE_MEM }; -extern char _text[], _end[], _etext[]; + +static struct resource bss_resource = { + .name = "Kernel bss", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; +extern char _text[], _end[], _etext[], _edata[], _bss[]; unsigned long ia64_max_cacheline_size; @@ -200,8 +205,11 @@ static int __init register_memory(void) code_resource.start = ia64_tpa(_text); code_resource.end = ia64_tpa(_etext) - 1; data_resource.start = ia64_tpa(_etext); - data_resource.end = ia64_tpa(_end) - 1; - efi_initialize_iomem_resources(&code_resource, &data_resource); + data_resource.end = ia64_tpa(_edata) - 1; + bss_resource.start = ia64_tpa(_bss); + bss_resource.end = ia64_tpa(_end) - 1; + efi_initialize_iomem_resources(&code_resource, &data_resource, + &bss_resource); return 0; } diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index ecd8a52b9b9e..511db2fd7bff 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -16,7 +16,7 @@ #include <asm/sn/pcidev.h> #include <asm/sn/sn_sal.h> -#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) +#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg))) #define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG)) /** diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c index 9d4e4b5b6bd8..ef490e1ce600 100644 --- a/arch/m68k/kernel/dma.c +++ b/arch/m68k/kernel/dma.c @@ -121,7 +121,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int i; for (i = 0; i < nents; sg++, i++) { - sg->dma_address = page_to_phys(sg->page) + sg->offset; + sg->dma_address = sg_phys(sg); dma_sync_single_for_device(dev, sg->dma_address, sg->length, dir); } return nents; diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index 98b5e5bac02e..b0b034c4654f 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -165,12 +165,11 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, for (i = 0; i < nents; i++, sg++) { unsigned long addr; - addr = (unsigned long) page_address(sg->page); + addr = (unsigned long) sg_virt(sg); if (!plat_device_is_coherent(dev) && addr) - __dma_sync(addr + sg->offset, sg->length, direction); + __dma_sync(addr, sg->length, direction); sg->dma_address = plat_map_dma_mem(dev, - (void *)(addr + sg->offset), - sg->length); + (void *)addr, sg->length); } return nents; @@ -223,10 +222,9 @@ void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, for (i = 0; i < nhwentries; i++, sg++) { if (!plat_device_is_coherent(dev) && direction != DMA_TO_DEVICE) { - addr = (unsigned long) page_address(sg->page); + addr = (unsigned long) sg_virt(sg); if (addr) - __dma_sync(addr + sg->offset, sg->length, - direction); + __dma_sync(addr, sg->length, direction); } plat_unmap_dma_mem(sg->dma_address); } @@ -304,7 +302,7 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, /* Make sure that gcc doesn't leave the empty loop body. */ for (i = 0; i < nelems; i++, sg++) { if (cpu_is_noncoherent_r10000(dev)) - __dma_sync((unsigned long)page_address(sg->page), + __dma_sync((unsigned long)page_address(sg_page(sg)), sg->length, direction); plat_unmap_dma_mem(sg->dma_address); } @@ -322,7 +320,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nele /* Make sure that gcc doesn't leave the empty loop body. */ for (i = 0; i < nelems; i++, sg++) { if (!plat_device_is_coherent(dev)) - __dma_sync((unsigned long)page_address(sg->page), + __dma_sync((unsigned long)page_address(sg_page(sg)), sg->length, direction); plat_unmap_dma_mem(sg->dma_address); } diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c index 9001104b56b0..14206e3f0819 100644 --- a/arch/powerpc/kernel/dma_64.c +++ b/arch/powerpc/kernel/dma_64.c @@ -161,8 +161,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int i; for_each_sg(sgl, sg, nents, i) { - sg->dma_address = (page_to_phys(sg->page) + sg->offset) | - dma_direct_offset; + sg->dma_address = sg_phys(sg) | dma_direct_offset; sg->dma_length = sg->length; } diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 289d7e935918..72fd87156b24 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -102,8 +102,7 @@ static int ibmebus_map_sg(struct device *dev, int i; for_each_sg(sgl, sg, nents, i) { - sg->dma_address = (dma_addr_t)page_address(sg->page) - + sg->offset; + sg->dma_address = (dma_addr_t) sg_virt(sg); sg->dma_length = sg->length; } diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 306a6f75b6c5..2d0c9ef555e9 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -307,7 +307,7 @@ int iommu_map_sg(struct iommu_table *tbl, struct scatterlist *sglist, continue; } /* Allocate iommu entries for that segment */ - vaddr = (unsigned long)page_address(s->page) + s->offset; + vaddr = (unsigned long) sg_virt(s); npages = iommu_num_pages(vaddr, slen); entry = iommu_range_alloc(tbl, npages, &handle, mask >> IOMMU_PAGE_SHIFT, 0); diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index 07e64b48e7fc..6405f4a36763 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -628,9 +628,8 @@ static int ps3_sb_map_sg(struct device *_dev, struct scatterlist *sgl, int i; for_each_sg(sgl, sg, nents, i) { - int result = ps3_dma_map(dev->d_region, - page_to_phys(sg->page) + sg->offset, sg->length, - &sg->dma_address, 0); + int result = ps3_dma_map(dev->d_region, sg_phys(sg), + sg->length, &sg->dma_address, 0); if (result) { pr_debug("%s:%d: ps3_dma_map failed (%d)\n", diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 9c3ed88853f3..97aa50d1e4ae 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -727,9 +727,8 @@ int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sgl, int nents, BUG_ON(direction == PCI_DMA_NONE); /* IIep is write-through, not flushing. */ for_each_sg(sgl, sg, nents, n) { - BUG_ON(page_address(sg->page) == NULL); - sg->dvma_address = - virt_to_phys(page_address(sg->page)) + sg->offset; + BUG_ON(page_address(sg_page(sg)) == NULL); + sg->dvma_address = virt_to_phys(sg_virt(sg)); sg->dvma_length = sg->length; } return nents; @@ -748,9 +747,9 @@ void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sgl, int nents, BUG_ON(direction == PCI_DMA_NONE); if (direction != PCI_DMA_TODEVICE) { for_each_sg(sgl, sg, nents, n) { - BUG_ON(page_address(sg->page) == NULL); + BUG_ON(page_address(sg_page(sg)) == NULL); mmu_inval_dma_area( - (unsigned long) page_address(sg->page), + (unsigned long) page_address(sg_page(sg)), (sg->length + PAGE_SIZE-1) & PAGE_MASK); } } @@ -798,9 +797,9 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sgl, int BUG_ON(direction == PCI_DMA_NONE); if (direction != PCI_DMA_TODEVICE) { for_each_sg(sgl, sg, nents, n) { - BUG_ON(page_address(sg->page) == NULL); + BUG_ON(page_address(sg_page(sg)) == NULL); mmu_inval_dma_area( - (unsigned long) page_address(sg->page), + (unsigned long) page_address(sg_page(sg)), (sg->length + PAGE_SIZE-1) & PAGE_MASK); } } @@ -814,9 +813,9 @@ void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sgl, BUG_ON(direction == PCI_DMA_NONE); if (direction != PCI_DMA_TODEVICE) { for_each_sg(sgl, sg, nents, n) { - BUG_ON(page_address(sg->page) == NULL); + BUG_ON(page_address(sg_page(sg)) == NULL); mmu_inval_dma_area( - (unsigned long) page_address(sg->page), + (unsigned long) page_address(sg_page(sg)), (sg->length + PAGE_SIZE-1) & PAGE_MASK); } } diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c index 375b4db63704..1666087c5b80 100644 --- a/arch/sparc/mm/io-unit.c +++ b/arch/sparc/mm/io-unit.c @@ -144,7 +144,7 @@ static void iounit_get_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus spin_lock_irqsave(&iounit->lock, flags); while (sz != 0) { --sz; - sg->dvma_address = iounit_get_area(iounit, (unsigned long)page_address(sg->page) + sg->offset, sg->length); + sg->dvma_address = iounit_get_area(iounit, sg_virt(sg), sg->length); sg->dvma_length = sg->length; sg = sg_next(sg); } diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 283656d9f6ea..4b934270f05e 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -238,7 +238,7 @@ static void iommu_get_scsi_sgl_noflush(struct scatterlist *sg, int sz, struct sb while (sz != 0) { --sz; n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT; - sg->dvma_address = iommu_get_one(sg->page, n, sbus) + sg->offset; + sg->dvma_address = iommu_get_one(sg_page(sg), n, sbus) + sg->offset; sg->dvma_length = (__u32) sg->length; sg = sg_next(sg); } @@ -252,7 +252,7 @@ static void iommu_get_scsi_sgl_gflush(struct scatterlist *sg, int sz, struct sbu while (sz != 0) { --sz; n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT; - sg->dvma_address = iommu_get_one(sg->page, n, sbus) + sg->offset; + sg->dvma_address = iommu_get_one(sg_page(sg), n, sbus) + sg->offset; sg->dvma_length = (__u32) sg->length; sg = sg_next(sg); } @@ -273,7 +273,7 @@ static void iommu_get_scsi_sgl_pflush(struct scatterlist *sg, int sz, struct sbu * XXX Is this a good assumption? * XXX What if someone else unmaps it here and races us? */ - if ((page = (unsigned long) page_address(sg->page)) != 0) { + if ((page = (unsigned long) page_address(sg_page(sg))) != 0) { for (i = 0; i < n; i++) { if (page != oldpage) { /* Already flushed? */ flush_page_for_dma(page); @@ -283,7 +283,7 @@ static void iommu_get_scsi_sgl_pflush(struct scatterlist *sg, int sz, struct sbu } } - sg->dvma_address = iommu_get_one(sg->page, n, sbus) + sg->offset; + sg->dvma_address = iommu_get_one(sg_page(sg), n, sbus) + sg->offset; sg->dvma_length = (__u32) sg->length; sg = sg_next(sg); } diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c index ee6708fc4492..a2cc141291c7 100644 --- a/arch/sparc/mm/sun4c.c +++ b/arch/sparc/mm/sun4c.c @@ -1228,7 +1228,7 @@ static void sun4c_get_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus * { while (sz != 0) { --sz; - sg->dvma_address = (__u32)sun4c_lockarea(page_address(sg->page) + sg->offset, sg->length); + sg->dvma_address = (__u32)sun4c_lockarea(sg_virt(sg), sg->length); sg->dvma_length = sg->length; sg = sg_next(sg); } diff --git a/arch/sparc64/kernel/iommu.c b/arch/sparc64/kernel/iommu.c index 29af777d7ac9..070a4846c0cb 100644 --- a/arch/sparc64/kernel/iommu.c +++ b/arch/sparc64/kernel/iommu.c @@ -472,8 +472,7 @@ static void dma_4u_unmap_single(struct device *dev, dma_addr_t bus_addr, spin_unlock_irqrestore(&iommu->lock, flags); } -#define SG_ENT_PHYS_ADDRESS(SG) \ - (__pa(page_address((SG)->page)) + (SG)->offset) +#define SG_ENT_PHYS_ADDRESS(SG) (__pa(sg_virt((SG)))) static void fill_sg(iopte_t *iopte, struct scatterlist *sg, int nused, int nelems, @@ -565,9 +564,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, /* Fast path single entry scatterlists. */ if (nelems == 1) { sglist->dma_address = - dma_4u_map_single(dev, - (page_address(sglist->page) + - sglist->offset), + dma_4u_map_single(dev, sg_virt(sglist), sglist->length, direction); if (unlikely(sglist->dma_address == DMA_ERROR_CODE)) return 0; diff --git a/arch/sparc64/kernel/iommu_common.c b/arch/sparc64/kernel/iommu_common.c index d7ca900ec51d..78e8277df655 100644 --- a/arch/sparc64/kernel/iommu_common.c +++ b/arch/sparc64/kernel/iommu_common.c @@ -73,7 +73,7 @@ static int verify_one_map(struct scatterlist *dma_sg, struct scatterlist **__sg, daddr = dma_sg->dma_address; sglen = sg->length; - sgaddr = (unsigned long) (page_address(sg->page) + sg->offset); + sgaddr = (unsigned long) sg_virt(sg); while (dlen > 0) { unsigned long paddr; @@ -123,7 +123,7 @@ static int verify_one_map(struct scatterlist *dma_sg, struct scatterlist **__sg, sg = sg_next(sg); if (--nents <= 0) break; - sgaddr = (unsigned long) (page_address(sg->page) + sg->offset); + sgaddr = (unsigned long) sg_virt(sg); sglen = sg->length; } if (dlen < 0) { @@ -191,7 +191,7 @@ void verify_sglist(struct scatterlist *sglist, int nents, iopte_t *iopte, int np printk("sg(%d): page_addr(%p) off(%x) length(%x) " "dma_address[%016x] dma_length[%016x]\n", i, - page_address(sg->page), sg->offset, + page_address(sg_page(sg)), sg->offset, sg->length, sg->dma_address, sg->dma_length); } @@ -207,15 +207,14 @@ unsigned long prepare_sg(struct scatterlist *sg, int nents) unsigned long prev; u32 dent_addr, dent_len; - prev = (unsigned long) (page_address(sg->page) + sg->offset); + prev = (unsigned long) sg_virt(sg); prev += (unsigned long) (dent_len = sg->length); - dent_addr = (u32) ((unsigned long)(page_address(sg->page) + sg->offset) - & (IO_PAGE_SIZE - 1UL)); + dent_addr = (u32) ((unsigned long)(sg_virt(sg)) & (IO_PAGE_SIZE - 1UL)); while (--nents) { unsigned long addr; sg = sg_next(sg); - addr = (unsigned long) (page_address(sg->page) + sg->offset); + addr = (unsigned long) sg_virt(sg); if (! VCONTIG(prev, addr)) { dma_sg->dma_address = dent_addr; dma_sg->dma_length = dent_len; diff --git a/arch/sparc64/kernel/ldc.c b/arch/sparc64/kernel/ldc.c index 85a2be0b0962..c8313cb60f0a 100644 --- a/arch/sparc64/kernel/ldc.c +++ b/arch/sparc64/kernel/ldc.c @@ -2057,7 +2057,7 @@ static void fill_cookies(struct cookie_state *sp, unsigned long pa, static int sg_count_one(struct scatterlist *sg) { - unsigned long base = page_to_pfn(sg->page) << PAGE_SHIFT; + unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT; long len = sg->length; if ((sg->offset | len) & (8UL - 1)) diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c index fe46ace3e59f..8c4875bdb4a8 100644 --- a/arch/sparc64/kernel/pci_sun4v.c +++ b/arch/sparc64/kernel/pci_sun4v.c @@ -365,8 +365,7 @@ static void dma_4v_unmap_single(struct device *dev, dma_addr_t bus_addr, spin_unlock_irqrestore(&iommu->lock, flags); } -#define SG_ENT_PHYS_ADDRESS(SG) \ - (__pa(page_address((SG)->page)) + (SG)->offset) +#define SG_ENT_PHYS_ADDRESS(SG) (__pa(sg_virt((SG)))) static long fill_sg(long entry, struct device *dev, struct scatterlist *sg, @@ -477,9 +476,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, /* Fast path single entry scatterlists. */ if (nelems == 1) { sglist->dma_address = - dma_4v_map_single(dev, - (page_address(sglist->page) + - sglist->offset), + dma_4v_map_single(dev, sg_virt(sglist), sglist->length, direction); if (unlikely(sglist->dma_address == DMA_ERROR_CODE)) return 0; diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 25b248a02507..3a8cd3dfb51c 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1115,7 +1115,7 @@ static void do_ubd_request(struct request_queue *q) } prepare_request(req, io_req, (unsigned long long) req->sector << 9, - sg->offset, sg->length, sg->page); + sg->offset, sg->length, sg_page(sg)); last_sectors = sg->length >> 9; n = os_write_file(thread_fd, &io_req, diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index f35ea2237522..a0ae2e7f6cec 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -27,13 +27,22 @@ #include <asm/segment.h> #include <asm/page.h> #include <asm/boot.h> +#include <asm/asm-offsets.h> .section ".text.head","ax",@progbits .globl startup_32 startup_32: - cld - cli + /* check to see if KEEP_SEGMENTS flag is meaningful */ + cmpw $0x207, BP_version(%esi) + jb 1f + + /* test KEEP_SEGMENTS flag to see if the bootloader is asking + * us to not reload segments */ + testb $(1<<6), BP_loadflags(%esi) + jnz 2f + +1: cli movl $(__BOOT_DS),%eax movl %eax,%ds movl %eax,%es @@ -41,6 +50,8 @@ startup_32: movl %eax,%gs movl %eax,%ss +2: cld + /* Calculate the delta between where we were compiled to run * at and where we were actually loaded at. This can only be done * with a short local call on x86. Nothing else will tell us what diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c index 1dc1e19c0a9f..b74d60d1b2fa 100644 --- a/arch/x86/boot/compressed/misc_32.c +++ b/arch/x86/boot/compressed/misc_32.c @@ -247,6 +247,9 @@ static void putstr(const char *s) int x,y,pos; char c; + if (RM_SCREEN_INFO.orig_video_mode == 0 && lines == 0 && cols == 0) + return; + x = RM_SCREEN_INFO.orig_x; y = RM_SCREEN_INFO.orig_y; diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index f3140e596d40..8353c81c41c0 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -119,7 +119,7 @@ _start: # Part 2 of the header, from the old setup.S .ascii "HdrS" # header signature - .word 0x0206 # header version number (>= 0x0105) + .word 0x0207 # header version number (>= 0x0105) # or else old loadlin-1.5 will fail) .globl realmode_swtch realmode_swtch: .word 0, 0 # default_switch, SETUPSEG @@ -214,6 +214,11 @@ cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, #added with boot protocol #version 2.06 +hardware_subarch: .long 0 # subarchitecture, added with 2.07 + # default to 0 for normal x86 PC + +hardware_subarch_data: .quad 0 + # End of setup header ##################################################### .section ".inittext", "ax" diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index f1b7cdda82b3..f8764716b0c0 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -15,6 +15,7 @@ #include <asm/fixmap.h> #include <asm/processor.h> #include <asm/thread_info.h> +#include <asm/bootparam.h> #include <asm/elf.h> #include <xen/interface/xen.h> @@ -146,4 +147,10 @@ void foo(void) OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); #endif + + BLANK(); + OFFSET(BP_scratch, boot_params, scratch); + OFFSET(BP_loadflags, boot_params, hdr.loadflags); + OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); + OFFSET(BP_version, boot_params, hdr.version); } diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index 58fd54eb5577..18f500d185a2 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -51,6 +51,13 @@ struct resource code_resource = { .flags = IORESOURCE_BUSY | IORESOURCE_MEM }; +struct resource bss_resource = { + .name = "Kernel bss", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + static struct resource system_rom_resource = { .name = "System ROM", .start = 0xf0000, @@ -254,7 +261,9 @@ static void __init probe_roms(void) * and also for regions reported as reserved by the e820. */ static void __init -legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) +legacy_init_iomem_resources(struct resource *code_resource, + struct resource *data_resource, + struct resource *bss_resource) { int i; @@ -287,6 +296,7 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat */ request_resource(res, code_resource); request_resource(res, data_resource); + request_resource(res, bss_resource); #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) request_resource(res, &crashk_res); @@ -307,9 +317,11 @@ static int __init request_standard_resources(void) printk("Setting up standard PCI resources\n"); if (efi_enabled) - efi_initialize_iomem_resources(&code_resource, &data_resource); + efi_initialize_iomem_resources(&code_resource, + &data_resource, &bss_resource); else - legacy_init_iomem_resources(&code_resource, &data_resource); + legacy_init_iomem_resources(&code_resource, + &data_resource, &bss_resource); /* EFI systems may still have VGA */ request_resource(&iomem_resource, &video_ram_resource); diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 57616865d8a0..04698e0b056c 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -47,7 +47,7 @@ unsigned long end_pfn_map; */ static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; -extern struct resource code_resource, data_resource; +extern struct resource code_resource, data_resource, bss_resource; /* Check for some hardcoded bad areas that early boot is not allowed to touch */ static inline int bad_addr(unsigned long *addrp, unsigned long size) @@ -225,6 +225,7 @@ void __init e820_reserve_resources(void) */ request_resource(res, &code_resource); request_resource(res, &data_resource); + request_resource(res, &bss_resource); #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) request_resource(res, &crashk_res); @@ -729,3 +730,22 @@ __init void e820_setup_gap(void) printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", pci_mem_start, gapstart, gapsize); } + +int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) +{ + int i; + + if (slot < 0 || slot >= e820.nr_map) + return -1; + for (i = slot; i < e820.nr_map; i++) { + if (e820.map[i].type != E820_RAM) + continue; + break; + } + if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT)) + return -1; + *addr = e820.map[i].addr; + *size = min_t(u64, e820.map[i].size + e820.map[i].addr, + max_pfn << PAGE_SHIFT) - *addr; + return i + 1; +} diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index b42558c48e9d..e2be78f49399 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c @@ -603,7 +603,8 @@ void __init efi_enter_virtual_mode(void) void __init efi_initialize_iomem_resources(struct resource *code_resource, - struct resource *data_resource) + struct resource *data_resource, + struct resource *bss_resource) { struct resource *res; efi_memory_desc_t *md; @@ -675,6 +676,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, if (md->type == EFI_CONVENTIONAL_MEMORY) { request_resource(res, code_resource); request_resource(res, data_resource); + request_resource(res, bss_resource); #ifdef CONFIG_KEXEC request_resource(res, &crashk_res); #endif diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 39677965e161..00b1c2c56454 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -79,22 +79,30 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_ */ .section .text.head,"ax",@progbits ENTRY(startup_32) + /* check to see if KEEP_SEGMENTS flag is meaningful */ + cmpw $0x207, BP_version(%esi) + jb 1f + + /* test KEEP_SEGMENTS flag to see if the bootloader is asking + us to not reload segments */ + testb $(1<<6), BP_loadflags(%esi) + jnz 2f /* * Set segments to known values. */ - cld - lgdt boot_gdt_descr - __PAGE_OFFSET +1: lgdt boot_gdt_descr - __PAGE_OFFSET movl $(__BOOT_DS),%eax movl %eax,%ds movl %eax,%es movl %eax,%fs movl %eax,%gs +2: /* * Clear BSS first so that there are no surprises... - * No need to cld as DF is already clear from cld above... */ + cld xorl %eax,%eax movl $__bss_start - __PAGE_OFFSET,%edi movl $__bss_stop - __PAGE_OFFSET,%ecx @@ -128,6 +136,35 @@ ENTRY(startup_32) movsl 1: +#ifdef CONFIG_PARAVIRT + cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET) + jb default_entry + + /* Paravirt-compatible boot parameters. Look to see what architecture + we're booting under. */ + movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax + cmpl $num_subarch_entries, %eax + jae bad_subarch + + movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax + subl $__PAGE_OFFSET, %eax + jmp *%eax + +bad_subarch: +WEAK(lguest_entry) +WEAK(xen_entry) + /* Unknown implementation; there's really + nothing we can do at this point. */ + ud2a +.data +subarch_entries: + .long default_entry /* normal x86/PC */ + .long lguest_entry /* lguest hypervisor */ + .long xen_entry /* Xen hypervisor */ +num_subarch_entries = (. - subarch_entries) / 4 +.previous +#endif /* CONFIG_PARAVIRT */ + /* * Initialize page tables. This creates a PDE and a set of page * tables, which are located immediately beyond _end. The variable @@ -140,6 +177,7 @@ ENTRY(startup_32) */ page_pde_offset = (__PAGE_OFFSET >> 20); +default_entry: movl $(pg0 - __PAGE_OFFSET), %edi movl $(swapper_pg_dir - __PAGE_OFFSET), %edx movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index b3c2d268d708..953328b55a30 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -31,6 +31,7 @@ #include <linux/sysdev.h> #include <linux/msi.h> #include <linux/htirq.h> +#include <linux/dmar.h> #ifdef CONFIG_ACPI #include <acpi/acpi_bus.h> #endif @@ -2031,8 +2032,64 @@ void arch_teardown_msi_irq(unsigned int irq) destroy_irq(irq); } -#endif /* CONFIG_PCI_MSI */ +#ifdef CONFIG_DMAR +#ifdef CONFIG_SMP +static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + struct msi_msg msg; + unsigned int dest; + cpumask_t tmp; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + dmar_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + dmar_msi_write(irq, &msg); + irq_desc[irq].affinity = mask; +} +#endif /* CONFIG_SMP */ + +struct irq_chip dmar_msi_type = { + .name = "DMAR_MSI", + .unmask = dmar_msi_unmask, + .mask = dmar_msi_mask, + .ack = ack_apic_edge, +#ifdef CONFIG_SMP + .set_affinity = dmar_msi_set_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +int arch_setup_dmar_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(NULL, irq, &msg); + if (ret < 0) + return ret; + dmar_msi_write(irq, &msg); + set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); + return 0; +} +#endif +#endif /* CONFIG_PCI_MSI */ /* * Hypertransport interrupt support */ diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 5098f58063a5..1a20fe31338b 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -411,8 +411,10 @@ static int calgary_nontranslate_map_sg(struct device* dev, int i; for_each_sg(sg, s, nelems, i) { - BUG_ON(!s->page); - s->dma_address = virt_to_bus(page_address(s->page) +s->offset); + struct page *p = sg_page(s); + + BUG_ON(!p); + s->dma_address = virt_to_bus(sg_virt(s)); s->dma_length = s->length; } return nelems; @@ -432,9 +434,9 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, return calgary_nontranslate_map_sg(dev, sg, nelems, direction); for_each_sg(sg, s, nelems, i) { - BUG_ON(!s->page); + BUG_ON(!sg_page(s)); - vaddr = (unsigned long)page_address(s->page) + s->offset; + vaddr = (unsigned long) sg_virt(s); npages = num_dma_pages(vaddr, s->length); entry = iommu_range_alloc(tbl, npages); diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index afaf9f12c032..393e2725a6e3 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -7,6 +7,7 @@ #include <linux/string.h> #include <linux/pci.h> #include <linux/module.h> +#include <linux/dmar.h> #include <asm/io.h> #include <asm/iommu.h> #include <asm/calgary.h> @@ -305,6 +306,8 @@ void __init pci_iommu_alloc(void) detect_calgary(); #endif + detect_intel_iommu(); + #ifdef CONFIG_SWIOTLB pci_swiotlb_init(); #endif @@ -316,6 +319,8 @@ static int __init pci_iommu_init(void) calgary_iommu_init(); #endif + intel_iommu_init(); + #ifdef CONFIG_IOMMU gart_iommu_init(); #endif diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 5cdfab65e93f..c56e9ee64964 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -302,7 +302,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, #endif for_each_sg(sg, s, nents, i) { - unsigned long addr = page_to_phys(s->page) + s->offset; + unsigned long addr = sg_phys(s); if (nonforced_iommu(dev, addr, s->length)) { addr = dma_map_area(dev, addr, s->length, dir); if (addr == bad_dma_address) { @@ -397,7 +397,7 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, start_sg = sgmap = sg; ps = NULL; /* shut up gcc */ for_each_sg(sg, s, nents, i) { - dma_addr_t addr = page_to_phys(s->page) + s->offset; + dma_addr_t addr = sg_phys(s); s->dma_address = addr; BUG_ON(s->length == 0); diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c index e85d4360360c..faf70bdca335 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu_64.c @@ -62,8 +62,8 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, int i; for_each_sg(sg, s, nents, i) { - BUG_ON(!s->page); - s->dma_address = virt_to_bus(page_address(s->page) +s->offset); + BUG_ON(!sg_page(s)); + s->dma_address = virt_to_bus(sg_virt(s)); if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) return 0; s->dma_length = s->length; diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index ba2e165a8a0f..cc0e91447b76 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -60,6 +60,7 @@ #include <asm/vmi.h> #include <setup_arch.h> #include <bios_ebda.h> +#include <asm/cacheflush.h> /* This value is set up by the early boot code to point to the value immediately after the boot time page tables. It contains a *physical* @@ -73,6 +74,7 @@ int disable_pse __devinitdata = 0; */ extern struct resource code_resource; extern struct resource data_resource; +extern struct resource bss_resource; /* cpu data as detected by the assembly code in head.S */ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; @@ -600,6 +602,8 @@ void __init setup_arch(char **cmdline_p) code_resource.end = virt_to_phys(_etext)-1; data_resource.start = virt_to_phys(_etext); data_resource.end = virt_to_phys(_edata)-1; + bss_resource.start = virt_to_phys(&__bss_start); + bss_resource.end = virt_to_phys(&__bss_stop)-1; parse_early_param(); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 31322d42eaae..e7a9e36bd52d 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -58,6 +58,7 @@ #include <asm/numa.h> #include <asm/sections.h> #include <asm/dmi.h> +#include <asm/cacheflush.h> /* * Machine setup.. @@ -133,6 +134,12 @@ struct resource code_resource = { .end = 0, .flags = IORESOURCE_RAM, }; +struct resource bss_resource = { + .name = "Kernel bss", + .start = 0, + .end = 0, + .flags = IORESOURCE_RAM, +}; #ifdef CONFIG_PROC_VMCORE /* elfcorehdr= specifies the location of elf core header @@ -276,6 +283,8 @@ void __init setup_arch(char **cmdline_p) code_resource.end = virt_to_phys(&_etext)-1; data_resource.start = virt_to_phys(&_etext); data_resource.end = virt_to_phys(&_edata)-1; + bss_resource.start = virt_to_phys(&__bss_start); + bss_resource.end = virt_to_phys(&__bss_stop)-1; early_identify_cpu(&boot_cpu_data); diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c index c7b7dfe1d405..c40afbaaf93d 100644 --- a/arch/x86/mm/pageattr_64.c +++ b/arch/x86/mm/pageattr_64.c @@ -61,10 +61,10 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, return base; } -static void cache_flush_page(void *adr) +void clflush_cache_range(void *adr, int size) { int i; - for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) + for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size) clflush(adr+i); } @@ -80,7 +80,7 @@ static void flush_kernel_map(void *arg) asm volatile("wbinvd" ::: "memory"); else list_for_each_entry(pg, l, lru) { void *adr = page_address(pg); - cache_flush_page(adr); + clflush_cache_range(adr, PAGE_SIZE); } __flush_tlb_all(); } diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index aab25f3ba3ce..c2d24991bb2b 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -750,6 +750,38 @@ config PCI_DOMAINS depends on PCI default y +config DMAR + bool "Support for DMA Remapping Devices (EXPERIMENTAL)" + depends on PCI_MSI && ACPI && EXPERIMENTAL + default y + help + DMA remapping (DMAR) devices support enables independent address + translations for Direct Memory Access (DMA) from devices. + These DMA remapping devices are reported via ACPI tables + and include PCI device scope covered by these DMA + remapping devices. + +config DMAR_GFX_WA + bool "Support for Graphics workaround" + depends on DMAR + default y + help + Current Graphics drivers tend to use physical address + for DMA and avoid using DMA APIs. Setting this config + option permits the IOMMU driver to set a unity map for + all the OS-visible memory. Hence the driver can continue + to use physical addresses for DMA. + +config DMAR_FLOPPY_WA + bool + depends on DMAR + default y + help + Floppy disk drivers are know to bypass DMA API calls + thereby failing to work when IOMMU is enabled. This + workaround will setup a 1:1 mapping for the first + 16M to make floppy (an ISA device) work. + source "drivers/pci/pcie/Kconfig" source "drivers/pci/Kconfig" diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 8025d646ab30..61c2e396f2fa 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -1354,8 +1354,9 @@ new_segment: else sg = sg_next(sg); - memset(sg, 0, sizeof(*sg)); - sg->page = bvec->bv_page; + sg_dma_len(sg) = 0; + sg_dma_address(sg) = 0; + sg_set_page(sg, bvec->bv_page); sg->length = nbytes; sg->offset = bvec->bv_offset; nsegs++; @@ -1363,6 +1364,9 @@ new_segment: bvprv = bvec; } /* segments in rq */ + if (sg) + __sg_mark_end(sg); + return nsegs; } diff --git a/crypto/digest.c b/crypto/digest.c index e56de6748b15..8871dec8cae7 100644 --- a/crypto/digest.c +++ b/crypto/digest.c @@ -41,7 +41,7 @@ static int update2(struct hash_desc *desc, return 0; for (;;) { - struct page *pg = sg->page; + struct page *pg = sg_page(sg); unsigned int offset = sg->offset; unsigned int l = sg->length; diff --git a/crypto/hmac.c b/crypto/hmac.c index 8802fb6dd5a6..e4eb6ac53b5c 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -159,7 +159,8 @@ static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg, desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; sg_set_buf(sg1, ipad, bs); - sg1[1].page = (void *)sg; + + sg_set_page(&sg[1], (void *) sg); sg1[1].length = 0; sg_set_buf(sg2, opad, bs + ds); diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c index d6852c33cfb7..b9bbda0bb9f9 100644 --- a/crypto/scatterwalk.c +++ b/crypto/scatterwalk.c @@ -54,7 +54,7 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out, if (out) { struct page *page; - page = walk->sg->page + ((walk->offset - 1) >> PAGE_SHIFT); + page = sg_page(walk->sg) + ((walk->offset - 1) >> PAGE_SHIFT); flush_dcache_page(page); } diff --git a/crypto/scatterwalk.h b/crypto/scatterwalk.h index 9c73e37a42ce..87ed681cceba 100644 --- a/crypto/scatterwalk.h +++ b/crypto/scatterwalk.h @@ -22,13 +22,13 @@ static inline struct scatterlist *scatterwalk_sg_next(struct scatterlist *sg) { - return (++sg)->length ? sg : (void *)sg->page; + return (++sg)->length ? sg : (void *) sg_page(sg); } static inline unsigned long scatterwalk_samebuf(struct scatter_walk *walk_in, struct scatter_walk *walk_out) { - return !(((walk_in->sg->page - walk_out->sg->page) << PAGE_SHIFT) + + return !(((sg_page(walk_in->sg) - sg_page(walk_out->sg)) << PAGE_SHIFT) + (int)(walk_in->offset - walk_out->offset)); } @@ -60,7 +60,7 @@ static inline unsigned int scatterwalk_aligned(struct scatter_walk *walk, static inline struct page *scatterwalk_page(struct scatter_walk *walk) { - return walk->sg->page + (walk->offset >> PAGE_SHIFT); + return sg_page(walk->sg) + (walk->offset >> PAGE_SHIFT); } static inline void scatterwalk_unmap(void *vaddr, int out) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 18d489c8b935..d741c63af42c 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -317,7 +317,7 @@ static void test_cipher(char *algo, int enc, goto out; } - q = kmap(sg[0].page) + sg[0].offset; + q = kmap(sg_page(&sg[0])) + sg[0].offset; hexdump(q, cipher_tv[i].rlen); printk("%s\n", @@ -390,7 +390,7 @@ static void test_cipher(char *algo, int enc, temp = 0; for (k = 0; k < cipher_tv[i].np; k++) { printk("page %u\n", k); - q = kmap(sg[k].page) + sg[k].offset; + q = kmap(sg_page(&sg[k])) + sg[k].offset; hexdump(q, cipher_tv[i].tap[k]); printk("%s\n", memcmp(q, cipher_tv[i].result + temp, diff --git a/crypto/xcbc.c b/crypto/xcbc.c index 9f502b86e0ea..ac68f3b62fde 100644 --- a/crypto/xcbc.c +++ b/crypto/xcbc.c @@ -120,7 +120,7 @@ static int crypto_xcbc_digest_update2(struct hash_desc *pdesc, do { - struct page *pg = sg[i].page; + struct page *pg = sg_page(&sg[i]); unsigned int offset = sg[i].offset; unsigned int slen = sg[i].length; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 629eadbd0ec0..69092bce1ada 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4296,7 +4296,7 @@ void ata_sg_clean(struct ata_queued_cmd *qc) sg_last(sg, qc->orig_n_elem)->length += qc->pad_len; if (pad_buf) { struct scatterlist *psg = &qc->pad_sgent; - void *addr = kmap_atomic(psg->page, KM_IRQ0); + void *addr = kmap_atomic(sg_page(psg), KM_IRQ0); memcpy(addr + psg->offset, pad_buf, qc->pad_len); kunmap_atomic(addr, KM_IRQ0); } @@ -4686,11 +4686,11 @@ static int ata_sg_setup(struct ata_queued_cmd *qc) * data in this function or read data in ata_sg_clean. */ offset = lsg->offset + lsg->length - qc->pad_len; - psg->page = nth_page(lsg->page, offset >> PAGE_SHIFT); + sg_set_page(psg, nth_page(sg_page(lsg), offset >> PAGE_SHIFT)); psg->offset = offset_in_page(offset); if (qc->tf.flags & ATA_TFLAG_WRITE) { - void *addr = kmap_atomic(psg->page, KM_IRQ0); + void *addr = kmap_atomic(sg_page(psg), KM_IRQ0); memcpy(pad_buf, addr + psg->offset, qc->pad_len); kunmap_atomic(addr, KM_IRQ0); } @@ -4836,7 +4836,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) if (qc->curbytes == qc->nbytes - qc->sect_size) ap->hsm_task_state = HSM_ST_LAST; - page = qc->cursg->page; + page = sg_page(qc->cursg); offset = qc->cursg->offset + qc->cursg_ofs; /* get the current page and offset */ @@ -4988,7 +4988,7 @@ next_sg: sg = qc->cursg; - page = sg->page; + page = sg_page(sg); offset = sg->offset + qc->cursg_ofs; /* get the current page and offset */ diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 9fbb39cd0f58..5b758b9ad0b8 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1544,7 +1544,7 @@ static unsigned int ata_scsi_rbuf_get(struct scsi_cmnd *cmd, u8 **buf_out) struct scatterlist *sg = scsi_sglist(cmd); if (sg) { - buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset; buflen = sg->length; } else { buf = NULL; diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c41d0728efe2..7868707c7eda 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -137,7 +137,7 @@ static ssize_t show_mem_state(struct sys_device *dev, char *buf) return len; } -static inline int memory_notify(unsigned long val, void *v) +int memory_notify(unsigned long val, void *v) { return blocking_notifier_call_chain(&memory_chain, val, v); } @@ -183,7 +183,6 @@ memory_block_action(struct memory_block *mem, unsigned long action) break; case MEM_OFFLINE: mem->state = MEM_GOING_OFFLINE; - memory_notify(MEM_GOING_OFFLINE, NULL); start_paddr = page_to_pfn(first_page) << PAGE_SHIFT; ret = remove_memory(start_paddr, PAGES_PER_SECTION << PAGE_SHIFT); @@ -191,7 +190,6 @@ memory_block_action(struct memory_block *mem, unsigned long action) mem->state = old_state; break; } - memory_notify(MEM_MAPPING_INVALID, NULL); break; default: printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", @@ -199,11 +197,6 @@ memory_block_action(struct memory_block *mem, unsigned long action) WARN_ON(1); ret = -EINVAL; } - /* - * For now, only notify on successful memory operations - */ - if (!ret) - memory_notify(action, NULL); return ret; } diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 84d6aa500e26..53505422867c 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -345,6 +345,7 @@ static bool DAC960_CreateAuxiliaryStructures(DAC960_Controller_T *Controller) Command->V1.ScatterGatherList = (DAC960_V1_ScatterGatherSegment_T *)ScatterGatherCPU; Command->V1.ScatterGatherListDMA = ScatterGatherDMA; + sg_init_table(Command->cmd_sglist, DAC960_V1_ScatterGatherLimit); } else { Command->cmd_sglist = Command->V2.ScatterList; Command->V2.ScatterGatherList = @@ -353,6 +354,7 @@ static bool DAC960_CreateAuxiliaryStructures(DAC960_Controller_T *Controller) Command->V2.RequestSense = (DAC960_SCSI_RequestSense_T *)RequestSenseCPU; Command->V2.RequestSenseDMA = RequestSenseDMA; + sg_init_table(Command->cmd_sglist, DAC960_V2_ScatterGatherLimit); } } return true; diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 7c2cfde08f18..5a6fe17fc638 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -2610,7 +2610,7 @@ static void do_cciss_request(struct request_queue *q) (int)creq->nr_sectors); #endif /* CCISS_DEBUG */ - memset(tmp_sg, 0, sizeof(tmp_sg)); + sg_init_table(tmp_sg, MAXSGENTRIES); seg = blk_rq_map_sg(q, creq, tmp_sg); /* get the DMA records for the setup */ @@ -2621,7 +2621,7 @@ static void do_cciss_request(struct request_queue *q) for (i = 0; i < seg; i++) { c->SG[i].Len = tmp_sg[i].length; - temp64.val = (__u64) pci_map_page(h->pdev, tmp_sg[i].page, + temp64.val = (__u64) pci_map_page(h->pdev, sg_page(&tmp_sg[i]), tmp_sg[i].offset, tmp_sg[i].length, dir); c->SG[i].Addr.lower = temp64.val32.lower; diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 568603d3043e..efab27fa1083 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -918,6 +918,7 @@ queue_next: DBGPX( printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors); ); + sg_init_table(tmp_sg, SG_MAX); seg = blk_rq_map_sg(q, creq, tmp_sg); /* Now do all the DMA Mappings */ @@ -929,7 +930,7 @@ DBGPX( { c->req.sg[i].size = tmp_sg[i].length; c->req.sg[i].addr = (__u32) pci_map_page(h->pci_dev, - tmp_sg[i].page, + sg_page(&tmp_sg[i]), tmp_sg[i].offset, tmp_sg[i].length, dir); } diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c index 40535036e893..1b58b010797f 100644 --- a/drivers/block/cryptoloop.c +++ b/drivers/block/cryptoloop.c @@ -26,6 +26,7 @@ #include <linux/crypto.h> #include <linux/blkdev.h> #include <linux/loop.h> +#include <linux/scatterlist.h> #include <asm/semaphore.h> #include <asm/uaccess.h> @@ -119,14 +120,17 @@ cryptoloop_transfer(struct loop_device *lo, int cmd, .tfm = tfm, .flags = CRYPTO_TFM_REQ_MAY_SLEEP, }; - struct scatterlist sg_out = { NULL, }; - struct scatterlist sg_in = { NULL, }; + struct scatterlist sg_out; + struct scatterlist sg_in; encdec_cbc_t encdecfunc; struct page *in_page, *out_page; unsigned in_offs, out_offs; int err; + sg_init_table(&sg_out, 1); + sg_init_table(&sg_in, 1); + if (cmd == READ) { in_page = raw_page; in_offs = raw_off; @@ -146,11 +150,11 @@ cryptoloop_transfer(struct loop_device *lo, int cmd, u32 iv[4] = { 0, }; iv[0] = cpu_to_le32(IV & 0xffffffff); - sg_in.page = in_page; + sg_set_page(&sg_in, in_page); sg_in.offset = in_offs; sg_in.length = sz; - sg_out.page = out_page; + sg_set_page(&sg_out, out_page); sg_out.offset = out_offs; sg_out.length = sz; diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 317a790c153b..7276f7d207c2 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -388,6 +388,7 @@ static int __send_request(struct request *req) op = VD_OP_BWRITE; } + sg_init_table(sg, port->ring_cookies); nsg = blk_rq_map_sg(req->q, req, sg); len = 0; diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 402209fec59a..282a69558e8a 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -522,6 +522,7 @@ static struct carm_request *carm_get_request(struct carm_host *host) host->n_msgs++; assert(host->n_msgs <= CARM_MAX_REQ); + sg_init_table(crq->sg, CARM_MAX_REQ_SG); return crq; } diff --git a/drivers/block/ub.c b/drivers/block/ub.c index c57dd2b3a0c8..14143f2c484d 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -25,6 +25,7 @@ #include <linux/usb_usual.h> #include <linux/blkdev.h> #include <linux/timer.h> +#include <linux/scatterlist.h> #include <scsi/scsi.h> #define DRV_NAME "ub" @@ -656,6 +657,7 @@ static int ub_request_fn_1(struct ub_lun *lun, struct request *rq) if ((cmd = ub_get_cmd(lun)) == NULL) return -1; memset(cmd, 0, sizeof(struct ub_scsi_cmd)); + sg_init_table(cmd->sgv, UB_MAX_REQ_SG); blkdev_dequeue_request(rq); @@ -1309,9 +1311,8 @@ static void ub_data_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd) else pipe = sc->send_bulk_pipe; sc->last_pipe = pipe; - usb_fill_bulk_urb(&sc->work_urb, sc->dev, pipe, - page_address(sg->page) + sg->offset, sg->length, - ub_urb_complete, sc); + usb_fill_bulk_urb(&sc->work_urb, sc->dev, pipe, sg_virt(sg), + sg->length, ub_urb_complete, sc); sc->work_urb.actual_length = 0; sc->work_urb.error_count = 0; sc->work_urb.status = 0; @@ -1427,7 +1428,7 @@ static void ub_state_sense(struct ub_dev *sc, struct ub_scsi_cmd *cmd) scmd->state = UB_CMDST_INIT; scmd->nsg = 1; sg = &scmd->sgv[0]; - sg->page = virt_to_page(sc->top_sense); + sg_set_page(sg, virt_to_page(sc->top_sense)); sg->offset = (unsigned long)sc->top_sense & (PAGE_SIZE-1); sg->length = UB_SENSE_SIZE; scmd->len = UB_SENSE_SIZE; @@ -1863,7 +1864,7 @@ static int ub_sync_read_cap(struct ub_dev *sc, struct ub_lun *lun, cmd->state = UB_CMDST_INIT; cmd->nsg = 1; sg = &cmd->sgv[0]; - sg->page = virt_to_page(p); + sg_set_page(sg, virt_to_page(p)); sg->offset = (unsigned long)p & (PAGE_SIZE-1); sg->length = 8; cmd->len = 8; diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c index e824b672e05a..ab5d404faa11 100644 --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c @@ -41,6 +41,7 @@ #include <linux/dma-mapping.h> #include <linux/completion.h> #include <linux/device.h> +#include <linux/scatterlist.h> #include <asm/uaccess.h> #include <asm/vio.h> @@ -270,6 +271,7 @@ static int send_request(struct request *req) d = req->rq_disk->private_data; /* Now build the scatter-gather list */ + sg_init_table(sg, VIOMAXBLOCKDMA); nsg = blk_rq_map_sg(req->q, req, sg); nsg = dma_map_sg(d->dev, sg, nsg, direction); diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c index 2f307c4df335..67588326ae56 100644 --- a/drivers/firewire/fw-ohci.c +++ b/drivers/firewire/fw-ohci.c @@ -606,7 +606,7 @@ static int at_context_queue_packet(struct context *ctx, struct fw_packet *packet) { struct fw_ohci *ohci = ctx->ohci; - dma_addr_t d_bus, payload_bus; + dma_addr_t d_bus, uninitialized_var(payload_bus); struct driver_data *driver_data; struct descriptor *d, *last; __le32 *header; @@ -1459,7 +1459,7 @@ ohci_allocate_iso_context(struct fw_card *card, int type, size_t header_size) /* FIXME: We need a fallback for pre 1.1 OHCI. */ if (callback == handle_ir_dualbuffer_packet && ohci->version < OHCI_VERSION_1_1) - return ERR_PTR(-EINVAL); + return ERR_PTR(-ENOSYS); spin_lock_irqsave(&ohci->lock, flags); index = ffs(*mask) - 1; @@ -1778,7 +1778,7 @@ ohci_queue_iso(struct fw_iso_context *base, buffer, payload); else /* FIXME: Implement fallback for OHCI 1.0 controllers. */ - return -EINVAL; + return -ENOSYS; } static const struct fw_card_driver ohci_driver = { @@ -1898,7 +1898,12 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent) ohci->version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff; fw_notify("Added fw-ohci device %s, OHCI version %x.%x\n", dev->dev.bus_id, ohci->version >> 16, ohci->version & 0xff); - + if (ohci->version < OHCI_VERSION_1_1) { + fw_notify(" Isochronous I/O is not yet implemented for " + "OHCI 1.0 chips.\n"); + fw_notify(" Cameras, audio devices etc. won't work on " + "this controller with this driver version.\n"); + } return 0; fail_self_id: diff --git a/drivers/ide/cris/ide-cris.c b/drivers/ide/cris/ide-cris.c index ff20377b4c82..e196aefa2070 100644 --- a/drivers/ide/cris/ide-cris.c +++ b/drivers/ide/cris/ide-cris.c @@ -935,11 +935,11 @@ static int cris_ide_build_dmatable (ide_drive_t *drive) * than two possibly non-adjacent physical 4kB pages. */ /* group sequential buffers into one large buffer */ - addr = page_to_phys(sg->page) + sg->offset; + addr = sg_phys(sg); size = sg_dma_len(sg); while (--i) { sg = sg_next(sg); - if ((addr + size) != page_to_phys(sg->page) + sg->offset) + if ((addr + size) != sg_phys(sg)) break; size += sg_dma_len(sg); } diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index d5146c57e5b3..ec55a173c08a 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1317,12 +1317,14 @@ static int hwif_init(ide_hwif_t *hwif) if (!hwif->sg_max_nents) hwif->sg_max_nents = PRD_ENTRIES; - hwif->sg_table = kzalloc(sizeof(struct scatterlist)*hwif->sg_max_nents, + hwif->sg_table = kmalloc(sizeof(struct scatterlist)*hwif->sg_max_nents, GFP_KERNEL); if (!hwif->sg_table) { printk(KERN_ERR "%s: unable to allocate SG table.\n", hwif->name); goto out; } + + sg_init_table(hwif->sg_table, hwif->sg_max_nents); if (init_irq(hwif) == 0) goto done; diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 73ef6bf5fbcc..d066546f2831 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -261,7 +261,7 @@ static void ide_pio_sector(ide_drive_t *drive, unsigned int write) hwif->cursg = sg; } - page = cursg->page; + page = sg_page(cursg); offset = cursg->offset + hwif->cursg_ofs * SECTOR_SIZE; /* get the current page and offset */ diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c index 1de58566e5b6..a4ce3ba15d61 100644 --- a/drivers/ide/mips/au1xxx-ide.c +++ b/drivers/ide/mips/au1xxx-ide.c @@ -276,8 +276,7 @@ static int auide_build_dmatable(ide_drive_t *drive) if (iswrite) { if(!put_source_flags(ahwif->tx_chan, - (void*)(page_address(sg->page) - + sg->offset), + (void*) sg_virt(sg), tc, flags)) { printk(KERN_ERR "%s failed %d\n", __FUNCTION__, __LINE__); @@ -285,8 +284,7 @@ static int auide_build_dmatable(ide_drive_t *drive) } else { if(!put_dest_flags(ahwif->rx_chan, - (void*)(page_address(sg->page) - + sg->offset), + (void*) sg_virt(sg), tc, flags)) { printk(KERN_ERR "%s failed %d\n", __FUNCTION__, __LINE__); diff --git a/drivers/ieee1394/dma.c b/drivers/ieee1394/dma.c index 45d605581922..25e113b50d86 100644 --- a/drivers/ieee1394/dma.c +++ b/drivers/ieee1394/dma.c @@ -111,7 +111,7 @@ int dma_region_alloc(struct dma_region *dma, unsigned long n_bytes, unsigned long va = (unsigned long)dma->kvirt + (i << PAGE_SHIFT); - dma->sglist[i].page = vmalloc_to_page((void *)va); + sg_set_page(&dma->sglist[i], vmalloc_to_page((void *)va)); dma->sglist[i].length = PAGE_SIZE; } diff --git a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c index 1b353b964b33..d5dfe11aa5c6 100644 --- a/drivers/ieee1394/sbp2.c +++ b/drivers/ieee1394/sbp2.c @@ -1466,7 +1466,7 @@ static void sbp2_prep_command_orb_sg(struct sbp2_command_orb *orb, cmd->dma_size = sgpnt[0].length; cmd->dma_type = CMD_DMA_PAGE; cmd->cmd_dma = dma_map_page(hi->host->device.parent, - sgpnt[0].page, sgpnt[0].offset, + sg_page(&sgpnt[0]), sgpnt[0].offset, cmd->dma_size, cmd->dma_dir); orb->data_descriptor_lo = cmd->cmd_dma; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 2f54e29dc7a6..14159ff29408 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -55,9 +55,11 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d ib_dma_unmap_sg(dev, chunk->page_list, chunk->nents, DMA_BIDIRECTIONAL); for (i = 0; i < chunk->nents; ++i) { + struct page *page = sg_page(&chunk->page_list[i]); + if (umem->writable && dirty) - set_page_dirty_lock(chunk->page_list[i].page); - put_page(chunk->page_list[i].page); + set_page_dirty_lock(page); + put_page(page); } kfree(chunk); @@ -164,11 +166,12 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, } chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK); + sg_init_table(chunk->page_list, chunk->nents); for (i = 0; i < chunk->nents; ++i) { if (vma_list && !is_vm_hugetlb_page(vma_list[i + off])) umem->hugetlb = 0; - chunk->page_list[i].page = page_list[i + off]; + sg_set_page(&chunk->page_list[i], page_list[i + off]); chunk->page_list[i].offset = 0; chunk->page_list[i].length = PAGE_SIZE; } @@ -179,7 +182,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, DMA_BIDIRECTIONAL); if (chunk->nmap <= 0) { for (i = 0; i < chunk->nents; ++i) - put_page(chunk->page_list[i].page); + put_page(sg_page(&chunk->page_list[i])); kfree(chunk); ret = -ENOMEM; diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c index 22709a4f8fc8..e90a0ea538a0 100644 --- a/drivers/infiniband/hw/ipath/ipath_dma.c +++ b/drivers/infiniband/hw/ipath/ipath_dma.c @@ -108,7 +108,7 @@ static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl, BUG_ON(!valid_dma_direction(direction)); for_each_sg(sgl, sg, nents, i) { - addr = (u64) page_address(sg->page); + addr = (u64) page_address(sg_page(sg)); /* TODO: handle highmem pages */ if (!addr) { ret = 0; @@ -127,7 +127,7 @@ static void ipath_unmap_sg(struct ib_device *dev, static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) { - u64 addr = (u64) page_address(sg->page); + u64 addr = (u64) page_address(sg_page(sg)); if (addr) addr += sg->offset; diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index e442470a2375..db4ba92f79fc 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -225,7 +225,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, for (i = 0; i < chunk->nents; i++) { void *vaddr; - vaddr = page_address(chunk->page_list[i].page); + vaddr = page_address(sg_page(&chunk->page_list[i])); if (!vaddr) { ret = ERR_PTR(-EINVAL); goto bail; diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index e61f3e626980..007b38157fc4 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -71,7 +71,7 @@ static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk * PCI_DMA_BIDIRECTIONAL); for (i = 0; i < chunk->npages; ++i) - __free_pages(chunk->mem[i].page, + __free_pages(sg_page(&chunk->mem[i]), get_order(chunk->mem[i].length)); } @@ -81,7 +81,7 @@ static void mthca_free_icm_coherent(struct mthca_dev *dev, struct mthca_icm_chun for (i = 0; i < chunk->npages; ++i) { dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length, - lowmem_page_address(chunk->mem[i].page), + lowmem_page_address(sg_page(&chunk->mem[i])), sg_dma_address(&chunk->mem[i])); } } @@ -107,10 +107,13 @@ void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent) static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask) { - mem->page = alloc_pages(gfp_mask, order); - if (!mem->page) + struct page *page; + + page = alloc_pages(gfp_mask, order); + if (!page) return -ENOMEM; + sg_set_page(mem, page); mem->length = PAGE_SIZE << order; mem->offset = 0; return 0; @@ -157,6 +160,7 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, if (!chunk) goto fail; + sg_init_table(chunk->mem, MTHCA_ICM_CHUNK_LEN); chunk->npages = 0; chunk->nsg = 0; list_add_tail(&chunk->list, &icm->chunk_list); @@ -304,7 +308,7 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_h * so if we found the page, dma_handle has already * been assigned to. */ if (chunk->mem[i].length > offset) { - page = chunk->mem[i].page; + page = sg_page(&chunk->mem[i]); goto out; } offset -= chunk->mem[i].length; @@ -445,6 +449,7 @@ static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int pag int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, struct mthca_user_db_table *db_tab, int index, u64 uaddr) { + struct page *pages[1]; int ret = 0; u8 status; int i; @@ -472,16 +477,17 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, } ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0, - &db_tab->page[i].mem.page, NULL); + pages, NULL); if (ret < 0) goto out; + sg_set_page(&db_tab->page[i].mem, pages[0]); db_tab->page[i].mem.length = MTHCA_ICM_PAGE_SIZE; db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK; ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); if (ret < 0) { - put_page(db_tab->page[i].mem.page); + put_page(pages[0]); goto out; } @@ -491,7 +497,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, ret = -EINVAL; if (ret) { pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); - put_page(db_tab->page[i].mem.page); + put_page(sg_page(&db_tab->page[i].mem)); goto out; } @@ -557,7 +563,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, if (db_tab->page[i].uvirt) { mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status); pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); - put_page(db_tab->page[i].mem.page); + put_page(sg_page(&db_tab->page[i].mem)); } } diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index f3529b6f0a33..d68798061795 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -131,7 +131,7 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, p = mem; for_each_sg(sgl, sg, data->size, i) { - from = kmap_atomic(sg->page, KM_USER0); + from = kmap_atomic(sg_page(sg), KM_USER0); memcpy(p, from + sg->offset, sg->length); @@ -191,7 +191,7 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, p = mem; for_each_sg(sgl, sg, sg_size, i) { - to = kmap_atomic(sg->page, KM_SOFTIRQ0); + to = kmap_atomic(sg_page(sg), KM_SOFTIRQ0); memcpy(to + sg->offset, p, sg->length); @@ -300,7 +300,7 @@ static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data, for_each_sg(sgl, sg, data->dma_nents, i) { /* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX " "offset: %ld sz: %ld\n", i, - (unsigned long)page_to_phys(sg->page), + (unsigned long)sg_phys(sg), (unsigned long)sg->offset, (unsigned long)sg->length); */ end_addr = ib_sg_dma_address(ibdev, sg) + @@ -336,7 +336,7 @@ static void iser_data_buf_dump(struct iser_data_buf *data, iser_err("sg[%d] dma_addr:0x%lX page:0x%p " "off:0x%x sz:0x%x dma_len:0x%x\n", i, (unsigned long)ib_sg_dma_address(ibdev, sg), - sg->page, sg->offset, + sg_page(sg), sg->offset, sg->length, ib_sg_dma_len(ibdev, sg)); } diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 0eb5416798bd..ac54f697c508 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -348,16 +348,17 @@ static int crypt_convert(struct crypt_config *cc, ctx->idx_out < ctx->bio_out->bi_vcnt) { struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in); struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out); - struct scatterlist sg_in = { - .page = bv_in->bv_page, - .offset = bv_in->bv_offset + ctx->offset_in, - .length = 1 << SECTOR_SHIFT - }; - struct scatterlist sg_out = { - .page = bv_out->bv_page, - .offset = bv_out->bv_offset + ctx->offset_out, - .length = 1 << SECTOR_SHIFT - }; + struct scatterlist sg_in, sg_out; + + sg_init_table(&sg_in, 1); + sg_set_page(&sg_in, bv_in->bv_page); + sg_in.offset = bv_in->bv_offset + ctx->offset_in; + sg_in.length = 1 << SECTOR_SHIFT; + + sg_init_table(&sg_out, 1); + sg_set_page(&sg_out, bv_out->bv_page); + sg_out.offset = bv_out->bv_offset + ctx->offset_out; + sg_out.length = 1 << SECTOR_SHIFT; ctx->offset_in += sg_in.length; if (ctx->offset_in >= bv_in->bv_len) { diff --git a/drivers/media/common/saa7146_core.c b/drivers/media/common/saa7146_core.c index 365a22118a09..2b1f8b4be00a 100644 --- a/drivers/media/common/saa7146_core.c +++ b/drivers/media/common/saa7146_core.c @@ -112,12 +112,13 @@ static struct scatterlist* vmalloc_to_sg(unsigned char *virt, int nr_pages) sglist = kcalloc(nr_pages, sizeof(struct scatterlist), GFP_KERNEL); if (NULL == sglist) return NULL; + sg_init_table(sglist, nr_pages); for (i = 0; i < nr_pages; i++, virt += PAGE_SIZE) { pg = vmalloc_to_page(virt); if (NULL == pg) goto err; BUG_ON(PageHighMem(pg)); - sglist[i].page = pg; + sg_set_page(&sglist[i], pg); sglist[i].length = PAGE_SIZE; } return sglist; diff --git a/drivers/media/video/ivtv/ivtv-udma.c b/drivers/media/video/ivtv/ivtv-udma.c index c4626d1cdf41..912b424e5204 100644 --- a/drivers/media/video/ivtv/ivtv-udma.c +++ b/drivers/media/video/ivtv/ivtv-udma.c @@ -63,10 +63,10 @@ int ivtv_udma_fill_sg_list (struct ivtv_user_dma *dma, struct ivtv_dma_page_info memcpy(page_address(dma->bouncemap[map_offset]) + offset, src, len); kunmap_atomic(src, KM_BOUNCE_READ); local_irq_restore(flags); - dma->SGlist[map_offset].page = dma->bouncemap[map_offset]; + sg_set_page(&dma->SGlist[map_offset], dma->bouncemap[map_offset]); } else { - dma->SGlist[map_offset].page = dma->map[map_offset]; + sg_set_page(&dma->SGlist[map_offset], dma->map[map_offset]); } offset = 0; map_offset++; diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c index 3eb6123227b2..0a18286279d3 100644 --- a/drivers/media/video/videobuf-dma-sg.c +++ b/drivers/media/video/videobuf-dma-sg.c @@ -60,12 +60,13 @@ videobuf_vmalloc_to_sg(unsigned char *virt, int nr_pages) sglist = kcalloc(nr_pages, sizeof(struct scatterlist), GFP_KERNEL); if (NULL == sglist) return NULL; + sg_init_table(sglist, nr_pages); for (i = 0; i < nr_pages; i++, virt += PAGE_SIZE) { pg = vmalloc_to_page(virt); if (NULL == pg) goto err; BUG_ON(PageHighMem(pg)); - sglist[i].page = pg; + sg_set_page(&sglist[i], pg); sglist[i].length = PAGE_SIZE; } return sglist; @@ -86,13 +87,14 @@ videobuf_pages_to_sg(struct page **pages, int nr_pages, int offset) sglist = kcalloc(nr_pages, sizeof(*sglist), GFP_KERNEL); if (NULL == sglist) return NULL; + sg_init_table(sglist, nr_pages); if (NULL == pages[0]) goto nopage; if (PageHighMem(pages[0])) /* DMA to highmem pages might not work */ goto highmem; - sglist[0].page = pages[0]; + sg_set_page(&sglist[0], pages[0]); sglist[0].offset = offset; sglist[0].length = PAGE_SIZE - offset; for (i = 1; i < nr_pages; i++) { @@ -100,7 +102,7 @@ videobuf_pages_to_sg(struct page **pages, int nr_pages, int offset) goto nopage; if (PageHighMem(pages[i])) goto highmem; - sglist[i].page = pages[i]; + sg_set_page(&sglist[i], pages[i]); sglist[i].length = PAGE_SIZE; } return sglist; diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index a5d0354bbbda..9203a0b221b3 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -13,6 +13,7 @@ #include <linux/blkdev.h> #include <linux/freezer.h> #include <linux/kthread.h> +#include <linux/scatterlist.h> #include <linux/mmc/card.h> #include <linux/mmc/host.h> @@ -153,19 +154,21 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock blk_queue_max_hw_segments(mq->queue, bouncesz / 512); blk_queue_max_segment_size(mq->queue, bouncesz); - mq->sg = kzalloc(sizeof(struct scatterlist), + mq->sg = kmalloc(sizeof(struct scatterlist), GFP_KERNEL); if (!mq->sg) { ret = -ENOMEM; goto cleanup_queue; } + sg_init_table(mq->sg, 1); - mq->bounce_sg = kzalloc(sizeof(struct scatterlist) * + mq->bounce_sg = kmalloc(sizeof(struct scatterlist) * bouncesz / 512, GFP_KERNEL); if (!mq->bounce_sg) { ret = -ENOMEM; goto cleanup_queue; } + sg_init_table(mq->bounce_sg, bouncesz / 512); } } #endif @@ -302,12 +305,12 @@ static void copy_sg(struct scatterlist *dst, unsigned int dst_len, BUG_ON(dst_len == 0); if (dst_size == 0) { - dst_buf = page_address(dst->page) + dst->offset; + dst_buf = sg_virt(dst); dst_size = dst->length; } if (src_size == 0) { - src_buf = page_address(src->page) + src->offset; + src_buf = sg_virt(dst); src_size = src->length; } @@ -353,9 +356,7 @@ unsigned int mmc_queue_map_sg(struct mmc_queue *mq) return 1; } - mq->sg[0].page = virt_to_page(mq->bounce_buf); - mq->sg[0].offset = offset_in_page(mq->bounce_buf); - mq->sg[0].length = 0; + sg_init_one(mq->sg, mq->bounce_buf, 0); while (sg_len) { mq->sg[0].length += mq->bounce_sg[sg_len - 1].length; diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index 7a452c2ad1f9..b1edcefdd4f9 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -149,7 +149,7 @@ static inline void at91_mci_sg_to_dma(struct at91mci_host *host, struct mmc_data sg = &data->sg[i]; - sgbuffer = kmap_atomic(sg->page, KM_BIO_SRC_IRQ) + sg->offset; + sgbuffer = kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset; amount = min(size, sg->length); size -= amount; @@ -226,7 +226,7 @@ static void at91_mci_pre_dma_read(struct at91mci_host *host) sg = &data->sg[host->transfer_index++]; pr_debug("sg = %p\n", sg); - sg->dma_address = dma_map_page(NULL, sg->page, sg->offset, sg->length, DMA_FROM_DEVICE); + sg->dma_address = dma_map_page(NULL, sg_page(sg), sg->offset, sg->length, DMA_FROM_DEVICE); pr_debug("dma address = %08X, length = %d\n", sg->dma_address, sg->length); @@ -283,7 +283,7 @@ static void at91_mci_post_dma_read(struct at91mci_host *host) int index; /* Swap the contents of the buffer */ - buffer = kmap_atomic(sg->page, KM_BIO_SRC_IRQ) + sg->offset; + buffer = kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset; pr_debug("buffer = %p, length = %d\n", buffer, sg->length); for (index = 0; index < (sg->length / 4); index++) @@ -292,7 +292,7 @@ static void at91_mci_post_dma_read(struct at91mci_host *host) kunmap_atomic(buffer, KM_BIO_SRC_IRQ); } - flush_dcache_page(sg->page); + flush_dcache_page(sg_page(sg)); } /* Is there another transfer to trigger? */ diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c index 92c4d0dfee43..bcbb6d247bf7 100644 --- a/drivers/mmc/host/au1xmmc.c +++ b/drivers/mmc/host/au1xmmc.c @@ -340,7 +340,7 @@ static void au1xmmc_send_pio(struct au1xmmc_host *host) /* This is the pointer to the data buffer */ sg = &data->sg[host->pio.index]; - sg_ptr = page_address(sg->page) + sg->offset + host->pio.offset; + sg_ptr = sg_virt(sg) + host->pio.offset; /* This is the space left inside the buffer */ sg_len = data->sg[host->pio.index].length - host->pio.offset; @@ -400,7 +400,7 @@ static void au1xmmc_receive_pio(struct au1xmmc_host *host) if (host->pio.index < host->dma.len) { sg = &data->sg[host->pio.index]; - sg_ptr = page_address(sg->page) + sg->offset + host->pio.offset; + sg_ptr = sg_virt(sg) + host->pio.offset; /* This is the space left inside the buffer */ sg_len = sg_dma_len(&data->sg[host->pio.index]) - host->pio.offset; @@ -613,14 +613,11 @@ au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data) if (host->flags & HOST_F_XMIT){ ret = au1xxx_dbdma_put_source_flags(channel, - (void *) (page_address(sg->page) + - sg->offset), - len, flags); + (void *) sg_virt(sg), len, flags); } else { ret = au1xxx_dbdma_put_dest_flags(channel, - (void *) (page_address(sg->page) + - sg->offset), + (void *) sg_virt(sg), len, flags); } diff --git a/drivers/mmc/host/imxmmc.c b/drivers/mmc/host/imxmmc.c index 6ebc41e7592c..fc72e1fadb6a 100644 --- a/drivers/mmc/host/imxmmc.c +++ b/drivers/mmc/host/imxmmc.c @@ -262,7 +262,7 @@ static void imxmci_setup_data(struct imxmci_host *host, struct mmc_data *data) } /* Convert back to virtual address */ - host->data_ptr = (u16*)(page_address(data->sg->page) + data->sg->offset); + host->data_ptr = (u16*)sg_virt(sg); host->data_cnt = 0; clear_bit(IMXMCI_PEND_DMA_DATA_b, &host->pending_events); diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 7ae18eaed6c5..12c2d807c145 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -813,7 +813,7 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd, && dir == DMA_FROM_DEVICE) dir = DMA_BIDIRECTIONAL; - dma_addr = dma_map_page(dma_dev, sg->page, 0, + dma_addr = dma_map_page(dma_dev, sg_page(sg), 0, PAGE_SIZE, dir); if (direction == DMA_TO_DEVICE) t->tx_dma = dma_addr + sg->offset; @@ -822,7 +822,7 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd, } /* allow pio too; we don't allow highmem */ - kmap_addr = kmap(sg->page); + kmap_addr = kmap(sg_page(sg)); if (direction == DMA_TO_DEVICE) t->tx_buf = kmap_addr + sg->offset; else @@ -855,8 +855,8 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd, /* discard mappings */ if (direction == DMA_FROM_DEVICE) - flush_kernel_dcache_page(sg->page); - kunmap(sg->page); + flush_kernel_dcache_page(sg_page(sg)); + kunmap(sg_page(sg)); if (dma_dev) dma_unmap_page(dma_dev, dma_addr, PAGE_SIZE, dir); diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c index 60a67dfcda6a..971e18b91f4a 100644 --- a/drivers/mmc/host/omap.c +++ b/drivers/mmc/host/omap.c @@ -24,10 +24,10 @@ #include <linux/mmc/host.h> #include <linux/mmc/card.h> #include <linux/clk.h> +#include <linux/scatterlist.h> #include <asm/io.h> #include <asm/irq.h> -#include <asm/scatterlist.h> #include <asm/mach-types.h> #include <asm/arch/board.h> @@ -383,7 +383,7 @@ mmc_omap_sg_to_buf(struct mmc_omap_host *host) sg = host->data->sg + host->sg_idx; host->buffer_bytes_left = sg->length; - host->buffer = page_address(sg->page) + sg->offset; + host->buffer = sg_virt(sg); if (host->buffer_bytes_left > host->total_bytes_left) host->buffer_bytes_left = host->total_bytes_left; } diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index b397121b947d..0db837e44b77 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -231,7 +231,7 @@ static void sdhci_deactivate_led(struct sdhci_host *host) static inline char* sdhci_sg_to_buffer(struct sdhci_host* host) { - return page_address(host->cur_sg->page) + host->cur_sg->offset; + return sg_virt(host->cur_sg); } static inline int sdhci_next_sg(struct sdhci_host* host) diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c index 9b904795eb77..c11a3d256051 100644 --- a/drivers/mmc/host/tifm_sd.c +++ b/drivers/mmc/host/tifm_sd.c @@ -192,7 +192,7 @@ static void tifm_sd_transfer_data(struct tifm_sd *host) } off = sg[host->sg_pos].offset + host->block_pos; - pg = nth_page(sg[host->sg_pos].page, off >> PAGE_SHIFT); + pg = nth_page(sg_page(&sg[host->sg_pos]), off >> PAGE_SHIFT); p_off = offset_in_page(off); p_cnt = PAGE_SIZE - p_off; p_cnt = min(p_cnt, cnt); @@ -241,18 +241,18 @@ static void tifm_sd_bounce_block(struct tifm_sd *host, struct mmc_data *r_data) } off = sg[host->sg_pos].offset + host->block_pos; - pg = nth_page(sg[host->sg_pos].page, off >> PAGE_SHIFT); + pg = nth_page(sg_page(&sg[host->sg_pos]), off >> PAGE_SHIFT); p_off = offset_in_page(off); p_cnt = PAGE_SIZE - p_off; p_cnt = min(p_cnt, cnt); p_cnt = min(p_cnt, t_size); if (r_data->flags & MMC_DATA_WRITE) - tifm_sd_copy_page(host->bounce_buf.page, + tifm_sd_copy_page(sg_page(&host->bounce_buf), r_data->blksz - t_size, pg, p_off, p_cnt); else if (r_data->flags & MMC_DATA_READ) - tifm_sd_copy_page(pg, p_off, host->bounce_buf.page, + tifm_sd_copy_page(pg, p_off, sg_page(&host->bounce_buf), r_data->blksz - t_size, p_cnt); t_size -= p_cnt; diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c index 80db11c05f2a..fa4c8c53cc7a 100644 --- a/drivers/mmc/host/wbsd.c +++ b/drivers/mmc/host/wbsd.c @@ -269,7 +269,7 @@ static inline int wbsd_next_sg(struct wbsd_host *host) static inline char *wbsd_sg_to_buffer(struct wbsd_host *host) { - return page_address(host->cur_sg->page) + host->cur_sg->offset; + return sg_virt(host->cur_sg); } static inline void wbsd_sg_to_dma(struct wbsd_host *host, struct mmc_data *data) @@ -283,7 +283,7 @@ static inline void wbsd_sg_to_dma(struct wbsd_host *host, struct mmc_data *data) len = data->sg_len; for (i = 0; i < len; i++) { - sgbuf = page_address(sg[i].page) + sg[i].offset; + sgbuf = sg_virt(&sg[i]); memcpy(dmabuf, sgbuf, sg[i].length); dmabuf += sg[i].length; } @@ -300,7 +300,7 @@ static inline void wbsd_dma_to_sg(struct wbsd_host *host, struct mmc_data *data) len = data->sg_len; for (i = 0; i < len; i++) { - sgbuf = page_address(sg[i].page) + sg[i].offset; + sgbuf = sg_virt(&sg[i]); memcpy(sgbuf, dmabuf, sg[i].length); dmabuf += sg[i].length; } diff --git a/drivers/net/mlx4/icm.c b/drivers/net/mlx4/icm.c index 4b3c109d5eae..887633b207d9 100644 --- a/drivers/net/mlx4/icm.c +++ b/drivers/net/mlx4/icm.c @@ -60,7 +60,7 @@ static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chu PCI_DMA_BIDIRECTIONAL); for (i = 0; i < chunk->npages; ++i) - __free_pages(chunk->mem[i].page, + __free_pages(sg_page(&chunk->mem[i]), get_order(chunk->mem[i].length)); } @@ -70,7 +70,7 @@ static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk * for (i = 0; i < chunk->npages; ++i) dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length, - lowmem_page_address(chunk->mem[i].page), + lowmem_page_address(sg_page(&chunk->mem[i])), sg_dma_address(&chunk->mem[i])); } @@ -95,10 +95,13 @@ void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent) static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask) { - mem->page = alloc_pages(gfp_mask, order); - if (!mem->page) + struct page *page; + + page = alloc_pages(gfp_mask, order); + if (!page) return -ENOMEM; + sg_set_page(mem, page); mem->length = PAGE_SIZE << order; mem->offset = 0; return 0; @@ -145,6 +148,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, if (!chunk) goto fail; + sg_init_table(chunk->mem, MLX4_ICM_CHUNK_LEN); chunk->npages = 0; chunk->nsg = 0; list_add_tail(&chunk->list, &icm->chunk_list); @@ -334,7 +338,7 @@ void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_han * been assigned to. */ if (chunk->mem[i].length > offset) { - page = chunk->mem[i].page; + page = sg_page(&chunk->mem[i]); goto out; } offset -= chunk->mem[i].length; diff --git a/drivers/net/ppp_mppe.c b/drivers/net/ppp_mppe.c index c0b6d19d1457..bcb0885011c8 100644 --- a/drivers/net/ppp_mppe.c +++ b/drivers/net/ppp_mppe.c @@ -55,7 +55,7 @@ #include <linux/mm.h> #include <linux/ppp_defs.h> #include <linux/ppp-comp.h> -#include <asm/scatterlist.h> +#include <linux/scatterlist.h> #include "ppp_mppe.h" @@ -68,9 +68,7 @@ MODULE_VERSION("1.0.2"); static unsigned int setup_sg(struct scatterlist *sg, const void *address, unsigned int length) { - sg[0].page = virt_to_page(address); - sg[0].offset = offset_in_page(address); - sg[0].length = length; + sg_init_one(sg, address, length); return length; } diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 006054a40995..555055650733 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -20,6 +20,9 @@ obj-$(CONFIG_PCI_MSI) += msi.o # Build the Hypertransport interrupt support obj-$(CONFIG_HT_IRQ) += htirq.o +# Build Intel IOMMU support +obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o + # # Some architectures use the generic PCI setup functions # diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c new file mode 100644 index 000000000000..5dfdfdac92e1 --- /dev/null +++ b/drivers/pci/dmar.c @@ -0,0 +1,329 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) Ashok Raj <ashok.raj@intel.com> + * Copyright (C) Shaohua Li <shaohua.li@intel.com> + * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> + * + * This file implements early detection/parsing of DMA Remapping Devices + * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI + * tables. + */ + +#include <linux/pci.h> +#include <linux/dmar.h> + +#undef PREFIX +#define PREFIX "DMAR:" + +/* No locks are needed as DMA remapping hardware unit + * list is constructed at boot time and hotplug of + * these units are not supported by the architecture. + */ +LIST_HEAD(dmar_drhd_units); +LIST_HEAD(dmar_rmrr_units); + +static struct acpi_table_header * __initdata dmar_tbl; + +static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) +{ + /* + * add INCLUDE_ALL at the tail, so scan the list will find it at + * the very end. + */ + if (drhd->include_all) + list_add_tail(&drhd->list, &dmar_drhd_units); + else + list_add(&drhd->list, &dmar_drhd_units); +} + +static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) +{ + list_add(&rmrr->list, &dmar_rmrr_units); +} + +static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, + struct pci_dev **dev, u16 segment) +{ + struct pci_bus *bus; + struct pci_dev *pdev = NULL; + struct acpi_dmar_pci_path *path; + int count; + + bus = pci_find_bus(segment, scope->bus); + path = (struct acpi_dmar_pci_path *)(scope + 1); + count = (scope->length - sizeof(struct acpi_dmar_device_scope)) + / sizeof(struct acpi_dmar_pci_path); + + while (count) { + if (pdev) + pci_dev_put(pdev); + /* + * Some BIOSes list non-exist devices in DMAR table, just + * ignore it + */ + if (!bus) { + printk(KERN_WARNING + PREFIX "Device scope bus [%d] not found\n", + scope->bus); + break; + } + pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn)); + if (!pdev) { + printk(KERN_WARNING PREFIX + "Device scope device [%04x:%02x:%02x.%02x] not found\n", + segment, bus->number, path->dev, path->fn); + break; + } + path ++; + count --; + bus = pdev->subordinate; + } + if (!pdev) { + printk(KERN_WARNING PREFIX + "Device scope device [%04x:%02x:%02x.%02x] not found\n", + segment, scope->bus, path->dev, path->fn); + *dev = NULL; + return 0; + } + if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \ + pdev->subordinate) || (scope->entry_type == \ + ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) { + pci_dev_put(pdev); + printk(KERN_WARNING PREFIX + "Device scope type does not match for %s\n", + pci_name(pdev)); + return -EINVAL; + } + *dev = pdev; + return 0; +} + +static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, + struct pci_dev ***devices, u16 segment) +{ + struct acpi_dmar_device_scope *scope; + void * tmp = start; + int index; + int ret; + + *cnt = 0; + while (start < end) { + scope = start; + if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT || + scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) + (*cnt)++; + else + printk(KERN_WARNING PREFIX + "Unsupported device scope\n"); + start += scope->length; + } + if (*cnt == 0) + return 0; + + *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL); + if (!*devices) + return -ENOMEM; + + start = tmp; + index = 0; + while (start < end) { + scope = start; + if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT || + scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) { + ret = dmar_parse_one_dev_scope(scope, + &(*devices)[index], segment); + if (ret) { + kfree(*devices); + return ret; + } + index ++; + } + start += scope->length; + } + + return 0; +} + +/** + * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition + * structure which uniquely represent one DMA remapping hardware unit + * present in the platform + */ +static int __init +dmar_parse_one_drhd(struct acpi_dmar_header *header) +{ + struct acpi_dmar_hardware_unit *drhd; + struct dmar_drhd_unit *dmaru; + int ret = 0; + static int include_all; + + dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL); + if (!dmaru) + return -ENOMEM; + + drhd = (struct acpi_dmar_hardware_unit *)header; + dmaru->reg_base_addr = drhd->address; + dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ + + if (!dmaru->include_all) + ret = dmar_parse_dev_scope((void *)(drhd + 1), + ((void *)drhd) + header->length, + &dmaru->devices_cnt, &dmaru->devices, + drhd->segment); + else { + /* Only allow one INCLUDE_ALL */ + if (include_all) { + printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL " + "device scope is allowed\n"); + ret = -EINVAL; + } + include_all = 1; + } + + if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) + kfree(dmaru); + else + dmar_register_drhd_unit(dmaru); + return ret; +} + +static int __init +dmar_parse_one_rmrr(struct acpi_dmar_header *header) +{ + struct acpi_dmar_reserved_memory *rmrr; + struct dmar_rmrr_unit *rmrru; + int ret = 0; + + rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); + if (!rmrru) + return -ENOMEM; + + rmrr = (struct acpi_dmar_reserved_memory *)header; + rmrru->base_address = rmrr->base_address; + rmrru->end_address = rmrr->end_address; + ret = dmar_parse_dev_scope((void *)(rmrr + 1), + ((void *)rmrr) + header->length, + &rmrru->devices_cnt, &rmrru->devices, rmrr->segment); + + if (ret || (rmrru->devices_cnt == 0)) + kfree(rmrru); + else + dmar_register_rmrr_unit(rmrru); + return ret; +} + +static void __init +dmar_table_print_dmar_entry(struct acpi_dmar_header *header) +{ + struct acpi_dmar_hardware_unit *drhd; + struct acpi_dmar_reserved_memory *rmrr; + + switch (header->type) { + case ACPI_DMAR_TYPE_HARDWARE_UNIT: + drhd = (struct acpi_dmar_hardware_unit *)header; + printk (KERN_INFO PREFIX + "DRHD (flags: 0x%08x)base: 0x%016Lx\n", + drhd->flags, drhd->address); + break; + case ACPI_DMAR_TYPE_RESERVED_MEMORY: + rmrr = (struct acpi_dmar_reserved_memory *)header; + + printk (KERN_INFO PREFIX + "RMRR base: 0x%016Lx end: 0x%016Lx\n", + rmrr->base_address, rmrr->end_address); + break; + } +} + +/** + * parse_dmar_table - parses the DMA reporting table + */ +static int __init +parse_dmar_table(void) +{ + struct acpi_table_dmar *dmar; + struct acpi_dmar_header *entry_header; + int ret = 0; + + dmar = (struct acpi_table_dmar *)dmar_tbl; + if (!dmar) + return -ENODEV; + + if (!dmar->width) { + printk (KERN_WARNING PREFIX "Zero: Invalid DMAR haw\n"); + return -EINVAL; + } + + printk (KERN_INFO PREFIX "Host address width %d\n", + dmar->width + 1); + + entry_header = (struct acpi_dmar_header *)(dmar + 1); + while (((unsigned long)entry_header) < + (((unsigned long)dmar) + dmar_tbl->length)) { + dmar_table_print_dmar_entry(entry_header); + + switch (entry_header->type) { + case ACPI_DMAR_TYPE_HARDWARE_UNIT: + ret = dmar_parse_one_drhd(entry_header); + break; + case ACPI_DMAR_TYPE_RESERVED_MEMORY: + ret = dmar_parse_one_rmrr(entry_header); + break; + default: + printk(KERN_WARNING PREFIX + "Unknown DMAR structure type\n"); + ret = 0; /* for forward compatibility */ + break; + } + if (ret) + break; + + entry_header = ((void *)entry_header + entry_header->length); + } + return ret; +} + + +int __init dmar_table_init(void) +{ + + parse_dmar_table(); + if (list_empty(&dmar_drhd_units)) { + printk(KERN_INFO PREFIX "No DMAR devices found\n"); + return -ENODEV; + } + return 0; +} + +/** + * early_dmar_detect - checks to see if the platform supports DMAR devices + */ +int __init early_dmar_detect(void) +{ + acpi_status status = AE_OK; + + /* if we could find DMAR table, then there are DMAR devices */ + status = acpi_get_table(ACPI_SIG_DMAR, 0, + (struct acpi_table_header **)&dmar_tbl); + + if (ACPI_SUCCESS(status) && !dmar_tbl) { + printk (KERN_WARNING PREFIX "Unable to map DMAR\n"); + status = AE_NOT_FOUND; + } + + return (ACPI_SUCCESS(status) ? 1 : 0); +} diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c new file mode 100644 index 000000000000..b3d70310af49 --- /dev/null +++ b/drivers/pci/intel-iommu.c @@ -0,0 +1,2271 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) Ashok Raj <ashok.raj@intel.com> + * Copyright (C) Shaohua Li <shaohua.li@intel.com> + * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> + */ + +#include <linux/init.h> +#include <linux/bitmap.h> +#include <linux/slab.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/sysdev.h> +#include <linux/spinlock.h> +#include <linux/pci.h> +#include <linux/dmar.h> +#include <linux/dma-mapping.h> +#include <linux/mempool.h> +#include "iova.h" +#include "intel-iommu.h" +#include <asm/proto.h> /* force_iommu in this header in x86-64*/ +#include <asm/cacheflush.h> +#include <asm/iommu.h> +#include "pci.h" + +#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) +#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) + +#define IOAPIC_RANGE_START (0xfee00000) +#define IOAPIC_RANGE_END (0xfeefffff) +#define IOVA_START_ADDR (0x1000) + +#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 + +#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */ + +#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) + +static void domain_remove_dev_info(struct dmar_domain *domain); + +static int dmar_disabled; +static int __initdata dmar_map_gfx = 1; +static int dmar_forcedac; + +#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) +static DEFINE_SPINLOCK(device_domain_lock); +static LIST_HEAD(device_domain_list); + +static int __init intel_iommu_setup(char *str) +{ + if (!str) + return -EINVAL; + while (*str) { + if (!strncmp(str, "off", 3)) { + dmar_disabled = 1; + printk(KERN_INFO"Intel-IOMMU: disabled\n"); + } else if (!strncmp(str, "igfx_off", 8)) { + dmar_map_gfx = 0; + printk(KERN_INFO + "Intel-IOMMU: disable GFX device mapping\n"); + } else if (!strncmp(str, "forcedac", 8)) { + printk (KERN_INFO + "Intel-IOMMU: Forcing DAC for PCI devices\n"); + dmar_forcedac = 1; + } + + str += strcspn(str, ","); + while (*str == ',') + str++; + } + return 0; +} +__setup("intel_iommu=", intel_iommu_setup); + +static struct kmem_cache *iommu_domain_cache; +static struct kmem_cache *iommu_devinfo_cache; +static struct kmem_cache *iommu_iova_cache; + +static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep) +{ + unsigned int flags; + void *vaddr; + + /* trying to avoid low memory issues */ + flags = current->flags & PF_MEMALLOC; + current->flags |= PF_MEMALLOC; + vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC); + current->flags &= (~PF_MEMALLOC | flags); + return vaddr; +} + + +static inline void *alloc_pgtable_page(void) +{ + unsigned int flags; + void *vaddr; + + /* trying to avoid low memory issues */ + flags = current->flags & PF_MEMALLOC; + current->flags |= PF_MEMALLOC; + vaddr = (void *)get_zeroed_page(GFP_ATOMIC); + current->flags &= (~PF_MEMALLOC | flags); + return vaddr; +} + +static inline void free_pgtable_page(void *vaddr) +{ + free_page((unsigned long)vaddr); +} + +static inline void *alloc_domain_mem(void) +{ + return iommu_kmem_cache_alloc(iommu_domain_cache); +} + +static inline void free_domain_mem(void *vaddr) +{ + kmem_cache_free(iommu_domain_cache, vaddr); +} + +static inline void * alloc_devinfo_mem(void) +{ + return iommu_kmem_cache_alloc(iommu_devinfo_cache); +} + +static inline void free_devinfo_mem(void *vaddr) +{ + kmem_cache_free(iommu_devinfo_cache, vaddr); +} + +struct iova *alloc_iova_mem(void) +{ + return iommu_kmem_cache_alloc(iommu_iova_cache); +} + +void free_iova_mem(struct iova *iova) +{ + kmem_cache_free(iommu_iova_cache, iova); +} + +static inline void __iommu_flush_cache( + struct intel_iommu *iommu, void *addr, int size) +{ + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(addr, size); +} + +/* Gets context entry for a given bus and devfn */ +static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, + u8 bus, u8 devfn) +{ + struct root_entry *root; + struct context_entry *context; + unsigned long phy_addr; + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + root = &iommu->root_entry[bus]; + context = get_context_addr_from_root(root); + if (!context) { + context = (struct context_entry *)alloc_pgtable_page(); + if (!context) { + spin_unlock_irqrestore(&iommu->lock, flags); + return NULL; + } + __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K); + phy_addr = virt_to_phys((void *)context); + set_root_value(root, phy_addr); + set_root_present(root); + __iommu_flush_cache(iommu, root, sizeof(*root)); + } + spin_unlock_irqrestore(&iommu->lock, flags); + return &context[devfn]; +} + +static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) +{ + struct root_entry *root; + struct context_entry *context; + int ret; + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + root = &iommu->root_entry[bus]; + context = get_context_addr_from_root(root); + if (!context) { + ret = 0; + goto out; + } + ret = context_present(context[devfn]); +out: + spin_unlock_irqrestore(&iommu->lock, flags); + return ret; +} + +static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn) +{ + struct root_entry *root; + struct context_entry *context; + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + root = &iommu->root_entry[bus]; + context = get_context_addr_from_root(root); + if (context) { + context_clear_entry(context[devfn]); + __iommu_flush_cache(iommu, &context[devfn], \ + sizeof(*context)); + } + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static void free_context_table(struct intel_iommu *iommu) +{ + struct root_entry *root; + int i; + unsigned long flags; + struct context_entry *context; + + spin_lock_irqsave(&iommu->lock, flags); + if (!iommu->root_entry) { + goto out; + } + for (i = 0; i < ROOT_ENTRY_NR; i++) { + root = &iommu->root_entry[i]; + context = get_context_addr_from_root(root); + if (context) + free_pgtable_page(context); + } + free_pgtable_page(iommu->root_entry); + iommu->root_entry = NULL; +out: + spin_unlock_irqrestore(&iommu->lock, flags); +} + +/* page table handling */ +#define LEVEL_STRIDE (9) +#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) + +static inline int agaw_to_level(int agaw) +{ + return agaw + 2; +} + +static inline int agaw_to_width(int agaw) +{ + return 30 + agaw * LEVEL_STRIDE; + +} + +static inline int width_to_agaw(int width) +{ + return (width - 30) / LEVEL_STRIDE; +} + +static inline unsigned int level_to_offset_bits(int level) +{ + return (12 + (level - 1) * LEVEL_STRIDE); +} + +static inline int address_level_offset(u64 addr, int level) +{ + return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK); +} + +static inline u64 level_mask(int level) +{ + return ((u64)-1 << level_to_offset_bits(level)); +} + +static inline u64 level_size(int level) +{ + return ((u64)1 << level_to_offset_bits(level)); +} + +static inline u64 align_to_level(u64 addr, int level) +{ + return ((addr + level_size(level) - 1) & level_mask(level)); +} + +static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) +{ + int addr_width = agaw_to_width(domain->agaw); + struct dma_pte *parent, *pte = NULL; + int level = agaw_to_level(domain->agaw); + int offset; + unsigned long flags; + + BUG_ON(!domain->pgd); + + addr &= (((u64)1) << addr_width) - 1; + parent = domain->pgd; + + spin_lock_irqsave(&domain->mapping_lock, flags); + while (level > 0) { + void *tmp_page; + + offset = address_level_offset(addr, level); + pte = &parent[offset]; + if (level == 1) + break; + + if (!dma_pte_present(*pte)) { + tmp_page = alloc_pgtable_page(); + + if (!tmp_page) { + spin_unlock_irqrestore(&domain->mapping_lock, + flags); + return NULL; + } + __iommu_flush_cache(domain->iommu, tmp_page, + PAGE_SIZE_4K); + dma_set_pte_addr(*pte, virt_to_phys(tmp_page)); + /* + * high level table always sets r/w, last level page + * table control read/write + */ + dma_set_pte_readable(*pte); + dma_set_pte_writable(*pte); + __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); + } + parent = phys_to_virt(dma_pte_addr(*pte)); + level--; + } + + spin_unlock_irqrestore(&domain->mapping_lock, flags); + return pte; +} + +/* return address's pte at specific level */ +static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, + int level) +{ + struct dma_pte *parent, *pte = NULL; + int total = agaw_to_level(domain->agaw); + int offset; + + parent = domain->pgd; + while (level <= total) { + offset = address_level_offset(addr, total); + pte = &parent[offset]; + if (level == total) + return pte; + + if (!dma_pte_present(*pte)) + break; + parent = phys_to_virt(dma_pte_addr(*pte)); + total--; + } + return NULL; +} + +/* clear one page's page table */ +static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr) +{ + struct dma_pte *pte = NULL; + + /* get last level pte */ + pte = dma_addr_level_pte(domain, addr, 1); + + if (pte) { + dma_clear_pte(*pte); + __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); + } +} + +/* clear last level pte, a tlb flush should be followed */ +static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) +{ + int addr_width = agaw_to_width(domain->agaw); + + start &= (((u64)1) << addr_width) - 1; + end &= (((u64)1) << addr_width) - 1; + /* in case it's partial page */ + start = PAGE_ALIGN_4K(start); + end &= PAGE_MASK_4K; + + /* we don't need lock here, nobody else touches the iova range */ + while (start < end) { + dma_pte_clear_one(domain, start); + start += PAGE_SIZE_4K; + } +} + +/* free page table pages. last level pte should already be cleared */ +static void dma_pte_free_pagetable(struct dmar_domain *domain, + u64 start, u64 end) +{ + int addr_width = agaw_to_width(domain->agaw); + struct dma_pte *pte; + int total = agaw_to_level(domain->agaw); + int level; + u64 tmp; + + start &= (((u64)1) << addr_width) - 1; + end &= (((u64)1) << addr_width) - 1; + + /* we don't need lock here, nobody else touches the iova range */ + level = 2; + while (level <= total) { + tmp = align_to_level(start, level); + if (tmp >= end || (tmp + level_size(level) > end)) + return; + + while (tmp < end) { + pte = dma_addr_level_pte(domain, tmp, level); + if (pte) { + free_pgtable_page( + phys_to_virt(dma_pte_addr(*pte))); + dma_clear_pte(*pte); + __iommu_flush_cache(domain->iommu, + pte, sizeof(*pte)); + } + tmp += level_size(level); + } + level++; + } + /* free pgd */ + if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) { + free_pgtable_page(domain->pgd); + domain->pgd = NULL; + } +} + +/* iommu handling */ +static int iommu_alloc_root_entry(struct intel_iommu *iommu) +{ + struct root_entry *root; + unsigned long flags; + + root = (struct root_entry *)alloc_pgtable_page(); + if (!root) + return -ENOMEM; + + __iommu_flush_cache(iommu, root, PAGE_SIZE_4K); + + spin_lock_irqsave(&iommu->lock, flags); + iommu->root_entry = root; + spin_unlock_irqrestore(&iommu->lock, flags); + + return 0; +} + +#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ +{\ + unsigned long start_time = jiffies;\ + while (1) {\ + sts = op (iommu->reg + offset);\ + if (cond)\ + break;\ + if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\ + panic("DMAR hardware is malfunctioning\n");\ + cpu_relax();\ + }\ +} + +static void iommu_set_root_entry(struct intel_iommu *iommu) +{ + void *addr; + u32 cmd, sts; + unsigned long flag; + + addr = iommu->root_entry; + + spin_lock_irqsave(&iommu->register_lock, flag); + dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); + + cmd = iommu->gcmd | DMA_GCMD_SRTP; + writel(cmd, iommu->reg + DMAR_GCMD_REG); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (sts & DMA_GSTS_RTPS), sts); + + spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +static void iommu_flush_write_buffer(struct intel_iommu *iommu) +{ + u32 val; + unsigned long flag; + + if (!cap_rwbf(iommu->cap)) + return; + val = iommu->gcmd | DMA_GCMD_WBF; + + spin_lock_irqsave(&iommu->register_lock, flag); + writel(val, iommu->reg + DMAR_GCMD_REG); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (!(val & DMA_GSTS_WBFS)), val); + + spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +/* return value determine if we need a write buffer flush */ +static int __iommu_flush_context(struct intel_iommu *iommu, + u16 did, u16 source_id, u8 function_mask, u64 type, + int non_present_entry_flush) +{ + u64 val = 0; + unsigned long flag; + + /* + * In the non-present entry flush case, if hardware doesn't cache + * non-present entry we do nothing and if hardware cache non-present + * entry, we flush entries of domain 0 (the domain id is used to cache + * any non-present entries) + */ + if (non_present_entry_flush) { + if (!cap_caching_mode(iommu->cap)) + return 1; + else + did = 0; + } + + switch (type) { + case DMA_CCMD_GLOBAL_INVL: + val = DMA_CCMD_GLOBAL_INVL; + break; + case DMA_CCMD_DOMAIN_INVL: + val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did); + break; + case DMA_CCMD_DEVICE_INVL: + val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did) + | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); + break; + default: + BUG(); + } + val |= DMA_CCMD_ICC; + + spin_lock_irqsave(&iommu->register_lock, flag); + dmar_writeq(iommu->reg + DMAR_CCMD_REG, val); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, + dmar_readq, (!(val & DMA_CCMD_ICC)), val); + + spin_unlock_irqrestore(&iommu->register_lock, flag); + + /* flush context entry will implictly flush write buffer */ + return 0; +} + +static int inline iommu_flush_context_global(struct intel_iommu *iommu, + int non_present_entry_flush) +{ + return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, + non_present_entry_flush); +} + +static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did, + int non_present_entry_flush) +{ + return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL, + non_present_entry_flush); +} + +static int inline iommu_flush_context_device(struct intel_iommu *iommu, + u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush) +{ + return __iommu_flush_context(iommu, did, source_id, function_mask, + DMA_CCMD_DEVICE_INVL, non_present_entry_flush); +} + +/* return value determine if we need a write buffer flush */ +static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, + u64 addr, unsigned int size_order, u64 type, + int non_present_entry_flush) +{ + int tlb_offset = ecap_iotlb_offset(iommu->ecap); + u64 val = 0, val_iva = 0; + unsigned long flag; + + /* + * In the non-present entry flush case, if hardware doesn't cache + * non-present entry we do nothing and if hardware cache non-present + * entry, we flush entries of domain 0 (the domain id is used to cache + * any non-present entries) + */ + if (non_present_entry_flush) { + if (!cap_caching_mode(iommu->cap)) + return 1; + else + did = 0; + } + + switch (type) { + case DMA_TLB_GLOBAL_FLUSH: + /* global flush doesn't need set IVA_REG */ + val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; + break; + case DMA_TLB_DSI_FLUSH: + val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); + break; + case DMA_TLB_PSI_FLUSH: + val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); + /* Note: always flush non-leaf currently */ + val_iva = size_order | addr; + break; + default: + BUG(); + } + /* Note: set drain read/write */ +#if 0 + /* + * This is probably to be super secure.. Looks like we can + * ignore it without any impact. + */ + if (cap_read_drain(iommu->cap)) + val |= DMA_TLB_READ_DRAIN; +#endif + if (cap_write_drain(iommu->cap)) + val |= DMA_TLB_WRITE_DRAIN; + + spin_lock_irqsave(&iommu->register_lock, flag); + /* Note: Only uses first TLB reg currently */ + if (val_iva) + dmar_writeq(iommu->reg + tlb_offset, val_iva); + dmar_writeq(iommu->reg + tlb_offset + 8, val); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, tlb_offset + 8, + dmar_readq, (!(val & DMA_TLB_IVT)), val); + + spin_unlock_irqrestore(&iommu->register_lock, flag); + + /* check IOTLB invalidation granularity */ + if (DMA_TLB_IAIG(val) == 0) + printk(KERN_ERR"IOMMU: flush IOTLB failed\n"); + if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) + pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", + DMA_TLB_IIRG(type), DMA_TLB_IAIG(val)); + /* flush context entry will implictly flush write buffer */ + return 0; +} + +static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu, + int non_present_entry_flush) +{ + return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, + non_present_entry_flush); +} + +static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did, + int non_present_entry_flush) +{ + return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH, + non_present_entry_flush); +} + +static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, + u64 addr, unsigned int pages, int non_present_entry_flush) +{ + unsigned int mask; + + BUG_ON(addr & (~PAGE_MASK_4K)); + BUG_ON(pages == 0); + + /* Fallback to domain selective flush if no PSI support */ + if (!cap_pgsel_inv(iommu->cap)) + return iommu_flush_iotlb_dsi(iommu, did, + non_present_entry_flush); + + /* + * PSI requires page size to be 2 ^ x, and the base address is naturally + * aligned to the size + */ + mask = ilog2(__roundup_pow_of_two(pages)); + /* Fallback to domain selective flush if size is too big */ + if (mask > cap_max_amask_val(iommu->cap)) + return iommu_flush_iotlb_dsi(iommu, did, + non_present_entry_flush); + + return __iommu_flush_iotlb(iommu, did, addr, mask, + DMA_TLB_PSI_FLUSH, non_present_entry_flush); +} + +static int iommu_enable_translation(struct intel_iommu *iommu) +{ + u32 sts; + unsigned long flags; + + spin_lock_irqsave(&iommu->register_lock, flags); + writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (sts & DMA_GSTS_TES), sts); + + iommu->gcmd |= DMA_GCMD_TE; + spin_unlock_irqrestore(&iommu->register_lock, flags); + return 0; +} + +static int iommu_disable_translation(struct intel_iommu *iommu) +{ + u32 sts; + unsigned long flag; + + spin_lock_irqsave(&iommu->register_lock, flag); + iommu->gcmd &= ~DMA_GCMD_TE; + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (!(sts & DMA_GSTS_TES)), sts); + + spin_unlock_irqrestore(&iommu->register_lock, flag); + return 0; +} + +/* iommu interrupt handling. Most stuff are MSI-like. */ + +static char *fault_reason_strings[] = +{ + "Software", + "Present bit in root entry is clear", + "Present bit in context entry is clear", + "Invalid context entry", + "Access beyond MGAW", + "PTE Write access is not set", + "PTE Read access is not set", + "Next page table ptr is invalid", + "Root table address invalid", + "Context table ptr is invalid", + "non-zero reserved fields in RTP", + "non-zero reserved fields in CTP", + "non-zero reserved fields in PTE", + "Unknown" +}; +#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings) + +char *dmar_get_fault_reason(u8 fault_reason) +{ + if (fault_reason > MAX_FAULT_REASON_IDX) + return fault_reason_strings[MAX_FAULT_REASON_IDX]; + else + return fault_reason_strings[fault_reason]; +} + +void dmar_msi_unmask(unsigned int irq) +{ + struct intel_iommu *iommu = get_irq_data(irq); + unsigned long flag; + + /* unmask it */ + spin_lock_irqsave(&iommu->register_lock, flag); + writel(0, iommu->reg + DMAR_FECTL_REG); + /* Read a reg to force flush the post write */ + readl(iommu->reg + DMAR_FECTL_REG); + spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +void dmar_msi_mask(unsigned int irq) +{ + unsigned long flag; + struct intel_iommu *iommu = get_irq_data(irq); + + /* mask it */ + spin_lock_irqsave(&iommu->register_lock, flag); + writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG); + /* Read a reg to force flush the post write */ + readl(iommu->reg + DMAR_FECTL_REG); + spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +void dmar_msi_write(int irq, struct msi_msg *msg) +{ + struct intel_iommu *iommu = get_irq_data(irq); + unsigned long flag; + + spin_lock_irqsave(&iommu->register_lock, flag); + writel(msg->data, iommu->reg + DMAR_FEDATA_REG); + writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG); + writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG); + spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +void dmar_msi_read(int irq, struct msi_msg *msg) +{ + struct intel_iommu *iommu = get_irq_data(irq); + unsigned long flag; + + spin_lock_irqsave(&iommu->register_lock, flag); + msg->data = readl(iommu->reg + DMAR_FEDATA_REG); + msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG); + msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG); + spin_unlock_irqrestore(&iommu->register_lock, flag); +} + +static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type, + u8 fault_reason, u16 source_id, u64 addr) +{ + char *reason; + + reason = dmar_get_fault_reason(fault_reason); + + printk(KERN_ERR + "DMAR:[%s] Request device [%02x:%02x.%d] " + "fault addr %llx \n" + "DMAR:[fault reason %02d] %s\n", + (type ? "DMA Read" : "DMA Write"), + (source_id >> 8), PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); + return 0; +} + +#define PRIMARY_FAULT_REG_LEN (16) +static irqreturn_t iommu_page_fault(int irq, void *dev_id) +{ + struct intel_iommu *iommu = dev_id; + int reg, fault_index; + u32 fault_status; + unsigned long flag; + + spin_lock_irqsave(&iommu->register_lock, flag); + fault_status = readl(iommu->reg + DMAR_FSTS_REG); + + /* TBD: ignore advanced fault log currently */ + if (!(fault_status & DMA_FSTS_PPF)) + goto clear_overflow; + + fault_index = dma_fsts_fault_record_index(fault_status); + reg = cap_fault_reg_offset(iommu->cap); + while (1) { + u8 fault_reason; + u16 source_id; + u64 guest_addr; + int type; + u32 data; + + /* highest 32 bits */ + data = readl(iommu->reg + reg + + fault_index * PRIMARY_FAULT_REG_LEN + 12); + if (!(data & DMA_FRCD_F)) + break; + + fault_reason = dma_frcd_fault_reason(data); + type = dma_frcd_type(data); + + data = readl(iommu->reg + reg + + fault_index * PRIMARY_FAULT_REG_LEN + 8); + source_id = dma_frcd_source_id(data); + + guest_addr = dmar_readq(iommu->reg + reg + + fault_index * PRIMARY_FAULT_REG_LEN); + guest_addr = dma_frcd_page_addr(guest_addr); + /* clear the fault */ + writel(DMA_FRCD_F, iommu->reg + reg + + fault_index * PRIMARY_FAULT_REG_LEN + 12); + + spin_unlock_irqrestore(&iommu->register_lock, flag); + + iommu_page_fault_do_one(iommu, type, fault_reason, + source_id, guest_addr); + + fault_index++; + if (fault_index > cap_num_fault_regs(iommu->cap)) + fault_index = 0; + spin_lock_irqsave(&iommu->register_lock, flag); + } +clear_overflow: + /* clear primary fault overflow */ + fault_status = readl(iommu->reg + DMAR_FSTS_REG); + if (fault_status & DMA_FSTS_PFO) + writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG); + + spin_unlock_irqrestore(&iommu->register_lock, flag); + return IRQ_HANDLED; +} + +int dmar_set_interrupt(struct intel_iommu *iommu) +{ + int irq, ret; + + irq = create_irq(); + if (!irq) { + printk(KERN_ERR "IOMMU: no free vectors\n"); + return -EINVAL; + } + + set_irq_data(irq, iommu); + iommu->irq = irq; + + ret = arch_setup_dmar_msi(irq); + if (ret) { + set_irq_data(irq, NULL); + iommu->irq = 0; + destroy_irq(irq); + return 0; + } + + /* Force fault register is cleared */ + iommu_page_fault(irq, iommu); + + ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu); + if (ret) + printk(KERN_ERR "IOMMU: can't request irq\n"); + return ret; +} + +static int iommu_init_domains(struct intel_iommu *iommu) +{ + unsigned long ndomains; + unsigned long nlongs; + + ndomains = cap_ndoms(iommu->cap); + pr_debug("Number of Domains supportd <%ld>\n", ndomains); + nlongs = BITS_TO_LONGS(ndomains); + + /* TBD: there might be 64K domains, + * consider other allocation for future chip + */ + iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL); + if (!iommu->domain_ids) { + printk(KERN_ERR "Allocating domain id array failed\n"); + return -ENOMEM; + } + iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *), + GFP_KERNEL); + if (!iommu->domains) { + printk(KERN_ERR "Allocating domain array failed\n"); + kfree(iommu->domain_ids); + return -ENOMEM; + } + + /* + * if Caching mode is set, then invalid translations are tagged + * with domainid 0. Hence we need to pre-allocate it. + */ + if (cap_caching_mode(iommu->cap)) + set_bit(0, iommu->domain_ids); + return 0; +} + +static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) +{ + struct intel_iommu *iommu; + int ret; + int map_size; + u32 ver; + + iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); + if (!iommu) + return NULL; + iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); + if (!iommu->reg) { + printk(KERN_ERR "IOMMU: can't map the region\n"); + goto error; + } + iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); + iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); + + /* the registers might be more than one page */ + map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), + cap_max_fault_reg_offset(iommu->cap)); + map_size = PAGE_ALIGN_4K(map_size); + if (map_size > PAGE_SIZE_4K) { + iounmap(iommu->reg); + iommu->reg = ioremap(drhd->reg_base_addr, map_size); + if (!iommu->reg) { + printk(KERN_ERR "IOMMU: can't map the region\n"); + goto error; + } + } + + ver = readl(iommu->reg + DMAR_VER_REG); + pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", + drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), + iommu->cap, iommu->ecap); + ret = iommu_init_domains(iommu); + if (ret) + goto error_unmap; + spin_lock_init(&iommu->lock); + spin_lock_init(&iommu->register_lock); + + drhd->iommu = iommu; + return iommu; +error_unmap: + iounmap(iommu->reg); + iommu->reg = 0; +error: + kfree(iommu); + return NULL; +} + +static void domain_exit(struct dmar_domain *domain); +static void free_iommu(struct intel_iommu *iommu) +{ + struct dmar_domain *domain; + int i; + + if (!iommu) + return; + + i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); + for (; i < cap_ndoms(iommu->cap); ) { + domain = iommu->domains[i]; + clear_bit(i, iommu->domain_ids); + domain_exit(domain); + i = find_next_bit(iommu->domain_ids, + cap_ndoms(iommu->cap), i+1); + } + + if (iommu->gcmd & DMA_GCMD_TE) + iommu_disable_translation(iommu); + + if (iommu->irq) { + set_irq_data(iommu->irq, NULL); + /* This will mask the irq */ + free_irq(iommu->irq, iommu); + destroy_irq(iommu->irq); + } + + kfree(iommu->domains); + kfree(iommu->domain_ids); + + /* free context mapping */ + free_context_table(iommu); + + if (iommu->reg) + iounmap(iommu->reg); + kfree(iommu); +} + +static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) +{ + unsigned long num; + unsigned long ndomains; + struct dmar_domain *domain; + unsigned long flags; + + domain = alloc_domain_mem(); + if (!domain) + return NULL; + + ndomains = cap_ndoms(iommu->cap); + + spin_lock_irqsave(&iommu->lock, flags); + num = find_first_zero_bit(iommu->domain_ids, ndomains); + if (num >= ndomains) { + spin_unlock_irqrestore(&iommu->lock, flags); + free_domain_mem(domain); + printk(KERN_ERR "IOMMU: no free domain ids\n"); + return NULL; + } + + set_bit(num, iommu->domain_ids); + domain->id = num; + domain->iommu = iommu; + iommu->domains[num] = domain; + spin_unlock_irqrestore(&iommu->lock, flags); + + return domain; +} + +static void iommu_free_domain(struct dmar_domain *domain) +{ + unsigned long flags; + + spin_lock_irqsave(&domain->iommu->lock, flags); + clear_bit(domain->id, domain->iommu->domain_ids); + spin_unlock_irqrestore(&domain->iommu->lock, flags); +} + +static struct iova_domain reserved_iova_list; + +static void dmar_init_reserved_ranges(void) +{ + struct pci_dev *pdev = NULL; + struct iova *iova; + int i; + u64 addr, size; + + init_iova_domain(&reserved_iova_list); + + /* IOAPIC ranges shouldn't be accessed by DMA */ + iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START), + IOVA_PFN(IOAPIC_RANGE_END)); + if (!iova) + printk(KERN_ERR "Reserve IOAPIC range failed\n"); + + /* Reserve all PCI MMIO to avoid peer-to-peer access */ + for_each_pci_dev(pdev) { + struct resource *r; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + r = &pdev->resource[i]; + if (!r->flags || !(r->flags & IORESOURCE_MEM)) + continue; + addr = r->start; + addr &= PAGE_MASK_4K; + size = r->end - addr; + size = PAGE_ALIGN_4K(size); + iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr), + IOVA_PFN(size + addr) - 1); + if (!iova) + printk(KERN_ERR "Reserve iova failed\n"); + } + } + +} + +static void domain_reserve_special_ranges(struct dmar_domain *domain) +{ + copy_reserved_iova(&reserved_iova_list, &domain->iovad); +} + +static inline int guestwidth_to_adjustwidth(int gaw) +{ + int agaw; + int r = (gaw - 12) % 9; + + if (r == 0) + agaw = gaw; + else + agaw = gaw + 9 - r; + if (agaw > 64) + agaw = 64; + return agaw; +} + +static int domain_init(struct dmar_domain *domain, int guest_width) +{ + struct intel_iommu *iommu; + int adjust_width, agaw; + unsigned long sagaw; + + init_iova_domain(&domain->iovad); + spin_lock_init(&domain->mapping_lock); + + domain_reserve_special_ranges(domain); + + /* calculate AGAW */ + iommu = domain->iommu; + if (guest_width > cap_mgaw(iommu->cap)) + guest_width = cap_mgaw(iommu->cap); + domain->gaw = guest_width; + adjust_width = guestwidth_to_adjustwidth(guest_width); + agaw = width_to_agaw(adjust_width); + sagaw = cap_sagaw(iommu->cap); + if (!test_bit(agaw, &sagaw)) { + /* hardware doesn't support it, choose a bigger one */ + pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw); + agaw = find_next_bit(&sagaw, 5, agaw); + if (agaw >= 5) + return -ENODEV; + } + domain->agaw = agaw; + INIT_LIST_HEAD(&domain->devices); + + /* always allocate the top pgd */ + domain->pgd = (struct dma_pte *)alloc_pgtable_page(); + if (!domain->pgd) + return -ENOMEM; + __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K); + return 0; +} + +static void domain_exit(struct dmar_domain *domain) +{ + u64 end; + + /* Domain 0 is reserved, so dont process it */ + if (!domain) + return; + + domain_remove_dev_info(domain); + /* destroy iovas */ + put_iova_domain(&domain->iovad); + end = DOMAIN_MAX_ADDR(domain->gaw); + end = end & (~PAGE_MASK_4K); + + /* clear ptes */ + dma_pte_clear_range(domain, 0, end); + + /* free page tables */ + dma_pte_free_pagetable(domain, 0, end); + + iommu_free_domain(domain); + free_domain_mem(domain); +} + +static int domain_context_mapping_one(struct dmar_domain *domain, + u8 bus, u8 devfn) +{ + struct context_entry *context; + struct intel_iommu *iommu = domain->iommu; + unsigned long flags; + + pr_debug("Set context mapping for %02x:%02x.%d\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + BUG_ON(!domain->pgd); + context = device_to_context_entry(iommu, bus, devfn); + if (!context) + return -ENOMEM; + spin_lock_irqsave(&iommu->lock, flags); + if (context_present(*context)) { + spin_unlock_irqrestore(&iommu->lock, flags); + return 0; + } + + context_set_domain_id(*context, domain->id); + context_set_address_width(*context, domain->agaw); + context_set_address_root(*context, virt_to_phys(domain->pgd)); + context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); + context_set_fault_enable(*context); + context_set_present(*context); + __iommu_flush_cache(iommu, context, sizeof(*context)); + + /* it's a non-present to present mapping */ + if (iommu_flush_context_device(iommu, domain->id, + (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1)) + iommu_flush_write_buffer(iommu); + else + iommu_flush_iotlb_dsi(iommu, 0, 0); + spin_unlock_irqrestore(&iommu->lock, flags); + return 0; +} + +static int +domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) +{ + int ret; + struct pci_dev *tmp, *parent; + + ret = domain_context_mapping_one(domain, pdev->bus->number, + pdev->devfn); + if (ret) + return ret; + + /* dependent device mapping */ + tmp = pci_find_upstream_pcie_bridge(pdev); + if (!tmp) + return 0; + /* Secondary interface's bus number and devfn 0 */ + parent = pdev->bus->self; + while (parent != tmp) { + ret = domain_context_mapping_one(domain, parent->bus->number, + parent->devfn); + if (ret) + return ret; + parent = parent->bus->self; + } + if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ + return domain_context_mapping_one(domain, + tmp->subordinate->number, 0); + else /* this is a legacy PCI bridge */ + return domain_context_mapping_one(domain, + tmp->bus->number, tmp->devfn); +} + +static int domain_context_mapped(struct dmar_domain *domain, + struct pci_dev *pdev) +{ + int ret; + struct pci_dev *tmp, *parent; + + ret = device_context_mapped(domain->iommu, + pdev->bus->number, pdev->devfn); + if (!ret) + return ret; + /* dependent device mapping */ + tmp = pci_find_upstream_pcie_bridge(pdev); + if (!tmp) + return ret; + /* Secondary interface's bus number and devfn 0 */ + parent = pdev->bus->self; + while (parent != tmp) { + ret = device_context_mapped(domain->iommu, parent->bus->number, + parent->devfn); + if (!ret) + return ret; + parent = parent->bus->self; + } + if (tmp->is_pcie) + return device_context_mapped(domain->iommu, + tmp->subordinate->number, 0); + else + return device_context_mapped(domain->iommu, + tmp->bus->number, tmp->devfn); +} + +static int +domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, + u64 hpa, size_t size, int prot) +{ + u64 start_pfn, end_pfn; + struct dma_pte *pte; + int index; + + if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) + return -EINVAL; + iova &= PAGE_MASK_4K; + start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K; + end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K; + index = 0; + while (start_pfn < end_pfn) { + pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index); + if (!pte) + return -ENOMEM; + /* We don't need lock here, nobody else + * touches the iova range + */ + BUG_ON(dma_pte_addr(*pte)); + dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K); + dma_set_pte_prot(*pte, prot); + __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); + start_pfn++; + index++; + } + return 0; +} + +static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn) +{ + clear_context_table(domain->iommu, bus, devfn); + iommu_flush_context_global(domain->iommu, 0); + iommu_flush_iotlb_global(domain->iommu, 0); +} + +static void domain_remove_dev_info(struct dmar_domain *domain) +{ + struct device_domain_info *info; + unsigned long flags; + + spin_lock_irqsave(&device_domain_lock, flags); + while (!list_empty(&domain->devices)) { + info = list_entry(domain->devices.next, + struct device_domain_info, link); + list_del(&info->link); + list_del(&info->global); + if (info->dev) + info->dev->dev.archdata.iommu = NULL; + spin_unlock_irqrestore(&device_domain_lock, flags); + + detach_domain_for_dev(info->domain, info->bus, info->devfn); + free_devinfo_mem(info); + + spin_lock_irqsave(&device_domain_lock, flags); + } + spin_unlock_irqrestore(&device_domain_lock, flags); +} + +/* + * find_domain + * Note: we use struct pci_dev->dev.archdata.iommu stores the info + */ +struct dmar_domain * +find_domain(struct pci_dev *pdev) +{ + struct device_domain_info *info; + + /* No lock here, assumes no domain exit in normal case */ + info = pdev->dev.archdata.iommu; + if (info) + return info->domain; + return NULL; +} + +static int dmar_pci_device_match(struct pci_dev *devices[], int cnt, + struct pci_dev *dev) +{ + int index; + + while (dev) { + for (index = 0; index < cnt; index ++) + if (dev == devices[index]) + return 1; + + /* Check our parent */ + dev = dev->bus->self; + } + + return 0; +} + +static struct dmar_drhd_unit * +dmar_find_matched_drhd_unit(struct pci_dev *dev) +{ + struct dmar_drhd_unit *drhd = NULL; + + list_for_each_entry(drhd, &dmar_drhd_units, list) { + if (drhd->include_all || dmar_pci_device_match(drhd->devices, + drhd->devices_cnt, dev)) + return drhd; + } + + return NULL; +} + +/* domain is initialized */ +static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) +{ + struct dmar_domain *domain, *found = NULL; + struct intel_iommu *iommu; + struct dmar_drhd_unit *drhd; + struct device_domain_info *info, *tmp; + struct pci_dev *dev_tmp; + unsigned long flags; + int bus = 0, devfn = 0; + + domain = find_domain(pdev); + if (domain) + return domain; + + dev_tmp = pci_find_upstream_pcie_bridge(pdev); + if (dev_tmp) { + if (dev_tmp->is_pcie) { + bus = dev_tmp->subordinate->number; + devfn = 0; + } else { + bus = dev_tmp->bus->number; + devfn = dev_tmp->devfn; + } + spin_lock_irqsave(&device_domain_lock, flags); + list_for_each_entry(info, &device_domain_list, global) { + if (info->bus == bus && info->devfn == devfn) { + found = info->domain; + break; + } + } + spin_unlock_irqrestore(&device_domain_lock, flags); + /* pcie-pci bridge already has a domain, uses it */ + if (found) { + domain = found; + goto found_domain; + } + } + + /* Allocate new domain for the device */ + drhd = dmar_find_matched_drhd_unit(pdev); + if (!drhd) { + printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n", + pci_name(pdev)); + return NULL; + } + iommu = drhd->iommu; + + domain = iommu_alloc_domain(iommu); + if (!domain) + goto error; + + if (domain_init(domain, gaw)) { + domain_exit(domain); + goto error; + } + + /* register pcie-to-pci device */ + if (dev_tmp) { + info = alloc_devinfo_mem(); + if (!info) { + domain_exit(domain); + goto error; + } + info->bus = bus; + info->devfn = devfn; + info->dev = NULL; + info->domain = domain; + /* This domain is shared by devices under p2p bridge */ + domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES; + + /* pcie-to-pci bridge already has a domain, uses it */ + found = NULL; + spin_lock_irqsave(&device_domain_lock, flags); + list_for_each_entry(tmp, &device_domain_list, global) { + if (tmp->bus == bus && tmp->devfn == devfn) { + found = tmp->domain; + break; + } + } + if (found) { + free_devinfo_mem(info); + domain_exit(domain); + domain = found; + } else { + list_add(&info->link, &domain->devices); + list_add(&info->global, &device_domain_list); + } + spin_unlock_irqrestore(&device_domain_lock, flags); + } + +found_domain: + info = alloc_devinfo_mem(); + if (!info) + goto error; + info->bus = pdev->bus->number; + info->devfn = pdev->devfn; + info->dev = pdev; + info->domain = domain; + spin_lock_irqsave(&device_domain_lock, flags); + /* somebody is fast */ + found = find_domain(pdev); + if (found != NULL) { + spin_unlock_irqrestore(&device_domain_lock, flags); + if (found != domain) { + domain_exit(domain); + domain = found; + } + free_devinfo_mem(info); + return domain; + } + list_add(&info->link, &domain->devices); + list_add(&info->global, &device_domain_list); + pdev->dev.archdata.iommu = info; + spin_unlock_irqrestore(&device_domain_lock, flags); + return domain; +error: + /* recheck it here, maybe others set it */ + return find_domain(pdev); +} + +static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end) +{ + struct dmar_domain *domain; + unsigned long size; + u64 base; + int ret; + + printk(KERN_INFO + "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", + pci_name(pdev), start, end); + /* page table init */ + domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + if (!domain) + return -ENOMEM; + + /* The address might not be aligned */ + base = start & PAGE_MASK_4K; + size = end - base; + size = PAGE_ALIGN_4K(size); + if (!reserve_iova(&domain->iovad, IOVA_PFN(base), + IOVA_PFN(base + size) - 1)) { + printk(KERN_ERR "IOMMU: reserve iova failed\n"); + ret = -ENOMEM; + goto error; + } + + pr_debug("Mapping reserved region %lx@%llx for %s\n", + size, base, pci_name(pdev)); + /* + * RMRR range might have overlap with physical memory range, + * clear it first + */ + dma_pte_clear_range(domain, base, base + size); + + ret = domain_page_mapping(domain, base, base, size, + DMA_PTE_READ|DMA_PTE_WRITE); + if (ret) + goto error; + + /* context entry init */ + ret = domain_context_mapping(domain, pdev); + if (!ret) + return 0; +error: + domain_exit(domain); + return ret; + +} + +static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, + struct pci_dev *pdev) +{ + if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) + return 0; + return iommu_prepare_identity_map(pdev, rmrr->base_address, + rmrr->end_address + 1); +} + +#ifdef CONFIG_DMAR_GFX_WA +extern int arch_get_ram_range(int slot, u64 *addr, u64 *size); +static void __init iommu_prepare_gfx_mapping(void) +{ + struct pci_dev *pdev = NULL; + u64 base, size; + int slot; + int ret; + + for_each_pci_dev(pdev) { + if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO || + !IS_GFX_DEVICE(pdev)) + continue; + printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n", + pci_name(pdev)); + slot = arch_get_ram_range(0, &base, &size); + while (slot >= 0) { + ret = iommu_prepare_identity_map(pdev, + base, base + size); + if (ret) + goto error; + slot = arch_get_ram_range(slot, &base, &size); + } + continue; +error: + printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); + } +} +#endif + +#ifdef CONFIG_DMAR_FLOPPY_WA +static inline void iommu_prepare_isa(void) +{ + struct pci_dev *pdev; + int ret; + + pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); + if (!pdev) + return; + + printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n"); + ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); + + if (ret) + printk("IOMMU: Failed to create 0-64M identity map, " + "floppy might not work\n"); + +} +#else +static inline void iommu_prepare_isa(void) +{ + return; +} +#endif /* !CONFIG_DMAR_FLPY_WA */ + +int __init init_dmars(void) +{ + struct dmar_drhd_unit *drhd; + struct dmar_rmrr_unit *rmrr; + struct pci_dev *pdev; + struct intel_iommu *iommu; + int ret, unit = 0; + + /* + * for each drhd + * allocate root + * initialize and program root entry to not present + * endfor + */ + for_each_drhd_unit(drhd) { + if (drhd->ignored) + continue; + iommu = alloc_iommu(drhd); + if (!iommu) { + ret = -ENOMEM; + goto error; + } + + /* + * TBD: + * we could share the same root & context tables + * amoung all IOMMU's. Need to Split it later. + */ + ret = iommu_alloc_root_entry(iommu); + if (ret) { + printk(KERN_ERR "IOMMU: allocate root entry failed\n"); + goto error; + } + } + + /* + * For each rmrr + * for each dev attached to rmrr + * do + * locate drhd for dev, alloc domain for dev + * allocate free domain + * allocate page table entries for rmrr + * if context not allocated for bus + * allocate and init context + * set present in root table for this bus + * init context with domain, translation etc + * endfor + * endfor + */ + for_each_rmrr_units(rmrr) { + int i; + for (i = 0; i < rmrr->devices_cnt; i++) { + pdev = rmrr->devices[i]; + /* some BIOS lists non-exist devices in DMAR table */ + if (!pdev) + continue; + ret = iommu_prepare_rmrr_dev(rmrr, pdev); + if (ret) + printk(KERN_ERR + "IOMMU: mapping reserved region failed\n"); + } + } + + iommu_prepare_gfx_mapping(); + + iommu_prepare_isa(); + + /* + * for each drhd + * enable fault log + * global invalidate context cache + * global invalidate iotlb + * enable translation + */ + for_each_drhd_unit(drhd) { + if (drhd->ignored) + continue; + iommu = drhd->iommu; + sprintf (iommu->name, "dmar%d", unit++); + + iommu_flush_write_buffer(iommu); + + ret = dmar_set_interrupt(iommu); + if (ret) + goto error; + + iommu_set_root_entry(iommu); + + iommu_flush_context_global(iommu, 0); + iommu_flush_iotlb_global(iommu, 0); + + ret = iommu_enable_translation(iommu); + if (ret) + goto error; + } + + return 0; +error: + for_each_drhd_unit(drhd) { + if (drhd->ignored) + continue; + iommu = drhd->iommu; + free_iommu(iommu); + } + return ret; +} + +static inline u64 aligned_size(u64 host_addr, size_t size) +{ + u64 addr; + addr = (host_addr & (~PAGE_MASK_4K)) + size; + return PAGE_ALIGN_4K(addr); +} + +struct iova * +iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end) +{ + struct iova *piova; + + /* Make sure it's in range */ + end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end); + if (!size || (IOVA_START_ADDR + size > end)) + return NULL; + + piova = alloc_iova(&domain->iovad, + size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1); + return piova; +} + +static struct iova * +__intel_alloc_iova(struct device *dev, struct dmar_domain *domain, + size_t size) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct iova *iova = NULL; + + if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) { + iova = iommu_alloc_iova(domain, size, pdev->dma_mask); + } else { + /* + * First try to allocate an io virtual address in + * DMA_32BIT_MASK and if that fails then try allocating + * from higer range + */ + iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK); + if (!iova) + iova = iommu_alloc_iova(domain, size, pdev->dma_mask); + } + + if (!iova) { + printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); + return NULL; + } + + return iova; +} + +static struct dmar_domain * +get_valid_domain_for_dev(struct pci_dev *pdev) +{ + struct dmar_domain *domain; + int ret; + + domain = get_domain_for_dev(pdev, + DEFAULT_DOMAIN_ADDRESS_WIDTH); + if (!domain) { + printk(KERN_ERR + "Allocating domain for %s failed", pci_name(pdev)); + return 0; + } + + /* make sure context mapping is ok */ + if (unlikely(!domain_context_mapped(domain, pdev))) { + ret = domain_context_mapping(domain, pdev); + if (ret) { + printk(KERN_ERR + "Domain context map for %s failed", + pci_name(pdev)); + return 0; + } + } + + return domain; +} + +static dma_addr_t intel_map_single(struct device *hwdev, void *addr, + size_t size, int dir) +{ + struct pci_dev *pdev = to_pci_dev(hwdev); + int ret; + struct dmar_domain *domain; + unsigned long start_addr; + struct iova *iova; + int prot = 0; + + BUG_ON(dir == DMA_NONE); + if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) + return virt_to_bus(addr); + + domain = get_valid_domain_for_dev(pdev); + if (!domain) + return 0; + + addr = (void *)virt_to_phys(addr); + size = aligned_size((u64)addr, size); + + iova = __intel_alloc_iova(hwdev, domain, size); + if (!iova) + goto error; + + start_addr = iova->pfn_lo << PAGE_SHIFT_4K; + + /* + * Check if DMAR supports zero-length reads on write only + * mappings.. + */ + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ + !cap_zlr(domain->iommu->cap)) + prot |= DMA_PTE_READ; + if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) + prot |= DMA_PTE_WRITE; + /* + * addr - (addr + size) might be partial page, we should map the whole + * page. Note: if two part of one page are separately mapped, we + * might have two guest_addr mapping to the same host addr, but this + * is not a big problem + */ + ret = domain_page_mapping(domain, start_addr, + ((u64)addr) & PAGE_MASK_4K, size, prot); + if (ret) + goto error; + + pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n", + pci_name(pdev), size, (u64)addr, + size, (u64)start_addr, dir); + + /* it's a non-present to present mapping */ + ret = iommu_flush_iotlb_psi(domain->iommu, domain->id, + start_addr, size >> PAGE_SHIFT_4K, 1); + if (ret) + iommu_flush_write_buffer(domain->iommu); + + return (start_addr + ((u64)addr & (~PAGE_MASK_4K))); + +error: + if (iova) + __free_iova(&domain->iovad, iova); + printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n", + pci_name(pdev), size, (u64)addr, dir); + return 0; +} + +static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, + size_t size, int dir) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct dmar_domain *domain; + unsigned long start_addr; + struct iova *iova; + + if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) + return; + domain = find_domain(pdev); + BUG_ON(!domain); + + iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); + if (!iova) + return; + + start_addr = iova->pfn_lo << PAGE_SHIFT_4K; + size = aligned_size((u64)dev_addr, size); + + pr_debug("Device %s unmapping: %lx@%llx\n", + pci_name(pdev), size, (u64)start_addr); + + /* clear the whole page */ + dma_pte_clear_range(domain, start_addr, start_addr + size); + /* free page tables */ + dma_pte_free_pagetable(domain, start_addr, start_addr + size); + + if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr, + size >> PAGE_SHIFT_4K, 0)) + iommu_flush_write_buffer(domain->iommu); + + /* free iova */ + __free_iova(&domain->iovad, iova); +} + +static void * intel_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags) +{ + void *vaddr; + int order; + + size = PAGE_ALIGN_4K(size); + order = get_order(size); + flags &= ~(GFP_DMA | GFP_DMA32); + + vaddr = (void *)__get_free_pages(flags, order); + if (!vaddr) + return NULL; + memset(vaddr, 0, size); + + *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL); + if (*dma_handle) + return vaddr; + free_pages((unsigned long)vaddr, order); + return NULL; +} + +static void intel_free_coherent(struct device *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + int order; + + size = PAGE_ALIGN_4K(size); + order = get_order(size); + + intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL); + free_pages((unsigned long)vaddr, order); +} + +#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) +static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, + int nelems, int dir) +{ + int i; + struct pci_dev *pdev = to_pci_dev(hwdev); + struct dmar_domain *domain; + unsigned long start_addr; + struct iova *iova; + size_t size = 0; + void *addr; + struct scatterlist *sg; + + if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) + return; + + domain = find_domain(pdev); + + iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); + if (!iova) + return; + for_each_sg(sglist, sg, nelems, i) { + addr = SG_ENT_VIRT_ADDRESS(sg); + size += aligned_size((u64)addr, sg->length); + } + + start_addr = iova->pfn_lo << PAGE_SHIFT_4K; + + /* clear the whole page */ + dma_pte_clear_range(domain, start_addr, start_addr + size); + /* free page tables */ + dma_pte_free_pagetable(domain, start_addr, start_addr + size); + + if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr, + size >> PAGE_SHIFT_4K, 0)) + iommu_flush_write_buffer(domain->iommu); + + /* free iova */ + __free_iova(&domain->iovad, iova); +} + +static int intel_nontranslate_map_sg(struct device *hddev, + struct scatterlist *sglist, int nelems, int dir) +{ + int i; + struct scatterlist *sg; + + for_each_sg(sglist, sg, nelems, i) { + BUG_ON(!sg->page); + sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg)); + sg->dma_length = sg->length; + } + return nelems; +} + +static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, + int nelems, int dir) +{ + void *addr; + int i; + struct pci_dev *pdev = to_pci_dev(hwdev); + struct dmar_domain *domain; + size_t size = 0; + int prot = 0; + size_t offset = 0; + struct iova *iova = NULL; + int ret; + struct scatterlist *sg; + unsigned long start_addr; + + BUG_ON(dir == DMA_NONE); + if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) + return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); + + domain = get_valid_domain_for_dev(pdev); + if (!domain) + return 0; + + for_each_sg(sglist, sg, nelems, i) { + addr = SG_ENT_VIRT_ADDRESS(sg); + addr = (void *)virt_to_phys(addr); + size += aligned_size((u64)addr, sg->length); + } + + iova = __intel_alloc_iova(hwdev, domain, size); + if (!iova) { + sglist->dma_length = 0; + return 0; + } + + /* + * Check if DMAR supports zero-length reads on write only + * mappings.. + */ + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ + !cap_zlr(domain->iommu->cap)) + prot |= DMA_PTE_READ; + if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) + prot |= DMA_PTE_WRITE; + + start_addr = iova->pfn_lo << PAGE_SHIFT_4K; + offset = 0; + for_each_sg(sglist, sg, nelems, i) { + addr = SG_ENT_VIRT_ADDRESS(sg); + addr = (void *)virt_to_phys(addr); + size = aligned_size((u64)addr, sg->length); + ret = domain_page_mapping(domain, start_addr + offset, + ((u64)addr) & PAGE_MASK_4K, + size, prot); + if (ret) { + /* clear the page */ + dma_pte_clear_range(domain, start_addr, + start_addr + offset); + /* free page tables */ + dma_pte_free_pagetable(domain, start_addr, + start_addr + offset); + /* free iova */ + __free_iova(&domain->iovad, iova); + return 0; + } + sg->dma_address = start_addr + offset + + ((u64)addr & (~PAGE_MASK_4K)); + sg->dma_length = sg->length; + offset += size; + } + + /* it's a non-present to present mapping */ + if (iommu_flush_iotlb_psi(domain->iommu, domain->id, + start_addr, offset >> PAGE_SHIFT_4K, 1)) + iommu_flush_write_buffer(domain->iommu); + return nelems; +} + +static struct dma_mapping_ops intel_dma_ops = { + .alloc_coherent = intel_alloc_coherent, + .free_coherent = intel_free_coherent, + .map_single = intel_map_single, + .unmap_single = intel_unmap_single, + .map_sg = intel_map_sg, + .unmap_sg = intel_unmap_sg, +}; + +static inline int iommu_domain_cache_init(void) +{ + int ret = 0; + + iommu_domain_cache = kmem_cache_create("iommu_domain", + sizeof(struct dmar_domain), + 0, + SLAB_HWCACHE_ALIGN, + + NULL); + if (!iommu_domain_cache) { + printk(KERN_ERR "Couldn't create iommu_domain cache\n"); + ret = -ENOMEM; + } + + return ret; +} + +static inline int iommu_devinfo_cache_init(void) +{ + int ret = 0; + + iommu_devinfo_cache = kmem_cache_create("iommu_devinfo", + sizeof(struct device_domain_info), + 0, + SLAB_HWCACHE_ALIGN, + + NULL); + if (!iommu_devinfo_cache) { + printk(KERN_ERR "Couldn't create devinfo cache\n"); + ret = -ENOMEM; + } + + return ret; +} + +static inline int iommu_iova_cache_init(void) +{ + int ret = 0; + + iommu_iova_cache = kmem_cache_create("iommu_iova", + sizeof(struct iova), + 0, + SLAB_HWCACHE_ALIGN, + + NULL); + if (!iommu_iova_cache) { + printk(KERN_ERR "Couldn't create iova cache\n"); + ret = -ENOMEM; + } + + return ret; +} + +static int __init iommu_init_mempool(void) +{ + int ret; + ret = iommu_iova_cache_init(); + if (ret) + return ret; + + ret = iommu_domain_cache_init(); + if (ret) + goto domain_error; + + ret = iommu_devinfo_cache_init(); + if (!ret) + return ret; + + kmem_cache_destroy(iommu_domain_cache); +domain_error: + kmem_cache_destroy(iommu_iova_cache); + + return -ENOMEM; +} + +static void __init iommu_exit_mempool(void) +{ + kmem_cache_destroy(iommu_devinfo_cache); + kmem_cache_destroy(iommu_domain_cache); + kmem_cache_destroy(iommu_iova_cache); + +} + +void __init detect_intel_iommu(void) +{ + if (swiotlb || no_iommu || iommu_detected || dmar_disabled) + return; + if (early_dmar_detect()) { + iommu_detected = 1; + } +} + +static void __init init_no_remapping_devices(void) +{ + struct dmar_drhd_unit *drhd; + + for_each_drhd_unit(drhd) { + if (!drhd->include_all) { + int i; + for (i = 0; i < drhd->devices_cnt; i++) + if (drhd->devices[i] != NULL) + break; + /* ignore DMAR unit if no pci devices exist */ + if (i == drhd->devices_cnt) + drhd->ignored = 1; + } + } + + if (dmar_map_gfx) + return; + + for_each_drhd_unit(drhd) { + int i; + if (drhd->ignored || drhd->include_all) + continue; + + for (i = 0; i < drhd->devices_cnt; i++) + if (drhd->devices[i] && + !IS_GFX_DEVICE(drhd->devices[i])) + break; + + if (i < drhd->devices_cnt) + continue; + + /* bypass IOMMU if it is just for gfx devices */ + drhd->ignored = 1; + for (i = 0; i < drhd->devices_cnt; i++) { + if (!drhd->devices[i]) + continue; + drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; + } + } +} + +int __init intel_iommu_init(void) +{ + int ret = 0; + + if (no_iommu || swiotlb || dmar_disabled) + return -ENODEV; + + if (dmar_table_init()) + return -ENODEV; + + iommu_init_mempool(); + dmar_init_reserved_ranges(); + + init_no_remapping_devices(); + + ret = init_dmars(); + if (ret) { + printk(KERN_ERR "IOMMU: dmar init failed\n"); + put_iova_domain(&reserved_iova_list); + iommu_exit_mempool(); + return ret; + } + printk(KERN_INFO + "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); + + force_iommu = 1; + dma_ops = &intel_dma_ops; + return 0; +} + diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h new file mode 100644 index 000000000000..ee88dd2400cb --- /dev/null +++ b/drivers/pci/intel-iommu.h @@ -0,0 +1,325 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) Ashok Raj <ashok.raj@intel.com> + * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> + */ + +#ifndef _INTEL_IOMMU_H_ +#define _INTEL_IOMMU_H_ + +#include <linux/types.h> +#include <linux/msi.h> +#include "iova.h" +#include <linux/io.h> + +/* + * Intel IOMMU register specification per version 1.0 public spec. + */ + +#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ +#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ +#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ +#define DMAR_GCMD_REG 0x18 /* Global command register */ +#define DMAR_GSTS_REG 0x1c /* Global status register */ +#define DMAR_RTADDR_REG 0x20 /* Root entry table */ +#define DMAR_CCMD_REG 0x28 /* Context command reg */ +#define DMAR_FSTS_REG 0x34 /* Fault Status register */ +#define DMAR_FECTL_REG 0x38 /* Fault control register */ +#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */ +#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */ +#define DMAR_FEUADDR_REG 0x44 /* Upper address register */ +#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */ +#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */ +#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */ +#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ +#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ +#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ + +#define OFFSET_STRIDE (9) +/* +#define dmar_readl(dmar, reg) readl(dmar + reg) +#define dmar_readq(dmar, reg) ({ \ + u32 lo, hi; \ + lo = readl(dmar + reg); \ + hi = readl(dmar + reg + 4); \ + (((u64) hi) << 32) + lo; }) +*/ +static inline u64 dmar_readq(void *addr) +{ + u32 lo, hi; + lo = readl(addr); + hi = readl(addr + 4); + return (((u64) hi) << 32) + lo; +} + +static inline void dmar_writeq(void __iomem *addr, u64 val) +{ + writel((u32)val, addr); + writel((u32)(val >> 32), addr + 4); +} + +#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) +#define DMAR_VER_MINOR(v) ((v) & 0x0f) + +/* + * Decoding Capability Register + */ +#define cap_read_drain(c) (((c) >> 55) & 1) +#define cap_write_drain(c) (((c) >> 54) & 1) +#define cap_max_amask_val(c) (((c) >> 48) & 0x3f) +#define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1) +#define cap_pgsel_inv(c) (((c) >> 39) & 1) + +#define cap_super_page_val(c) (((c) >> 34) & 0xf) +#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \ + * OFFSET_STRIDE) + 21) + +#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16) +#define cap_max_fault_reg_offset(c) \ + (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16) + +#define cap_zlr(c) (((c) >> 22) & 1) +#define cap_isoch(c) (((c) >> 23) & 1) +#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) +#define cap_sagaw(c) (((c) >> 8) & 0x1f) +#define cap_caching_mode(c) (((c) >> 7) & 1) +#define cap_phmr(c) (((c) >> 6) & 1) +#define cap_plmr(c) (((c) >> 5) & 1) +#define cap_rwbf(c) (((c) >> 4) & 1) +#define cap_afl(c) (((c) >> 3) & 1) +#define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7))) +/* + * Extended Capability Register + */ + +#define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1) +#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) +#define ecap_max_iotlb_offset(e) \ + (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) +#define ecap_coherent(e) ((e) & 0x1) + + +/* IOTLB_REG */ +#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) +#define DMA_TLB_DSI_FLUSH (((u64)2) << 60) +#define DMA_TLB_PSI_FLUSH (((u64)3) << 60) +#define DMA_TLB_IIRG(type) ((type >> 60) & 7) +#define DMA_TLB_IAIG(val) (((val) >> 57) & 7) +#define DMA_TLB_READ_DRAIN (((u64)1) << 49) +#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48) +#define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32) +#define DMA_TLB_IVT (((u64)1) << 63) +#define DMA_TLB_IH_NONLEAF (((u64)1) << 6) +#define DMA_TLB_MAX_SIZE (0x3f) + +/* GCMD_REG */ +#define DMA_GCMD_TE (((u32)1) << 31) +#define DMA_GCMD_SRTP (((u32)1) << 30) +#define DMA_GCMD_SFL (((u32)1) << 29) +#define DMA_GCMD_EAFL (((u32)1) << 28) +#define DMA_GCMD_WBF (((u32)1) << 27) + +/* GSTS_REG */ +#define DMA_GSTS_TES (((u32)1) << 31) +#define DMA_GSTS_RTPS (((u32)1) << 30) +#define DMA_GSTS_FLS (((u32)1) << 29) +#define DMA_GSTS_AFLS (((u32)1) << 28) +#define DMA_GSTS_WBFS (((u32)1) << 27) + +/* CCMD_REG */ +#define DMA_CCMD_ICC (((u64)1) << 63) +#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61) +#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61) +#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61) +#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32) +#define DMA_CCMD_MASK_NOBIT 0 +#define DMA_CCMD_MASK_1BIT 1 +#define DMA_CCMD_MASK_2BIT 2 +#define DMA_CCMD_MASK_3BIT 3 +#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16) +#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff)) + +/* FECTL_REG */ +#define DMA_FECTL_IM (((u32)1) << 31) + +/* FSTS_REG */ +#define DMA_FSTS_PPF ((u32)2) +#define DMA_FSTS_PFO ((u32)1) +#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) + +/* FRCD_REG, 32 bits access */ +#define DMA_FRCD_F (((u32)1) << 31) +#define dma_frcd_type(d) ((d >> 30) & 1) +#define dma_frcd_fault_reason(c) (c & 0xff) +#define dma_frcd_source_id(c) (c & 0xffff) +#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ + +/* + * 0: Present + * 1-11: Reserved + * 12-63: Context Ptr (12 - (haw-1)) + * 64-127: Reserved + */ +struct root_entry { + u64 val; + u64 rsvd1; +}; +#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) +static inline bool root_present(struct root_entry *root) +{ + return (root->val & 1); +} +static inline void set_root_present(struct root_entry *root) +{ + root->val |= 1; +} +static inline void set_root_value(struct root_entry *root, unsigned long value) +{ + root->val |= value & PAGE_MASK_4K; +} + +struct context_entry; +static inline struct context_entry * +get_context_addr_from_root(struct root_entry *root) +{ + return (struct context_entry *) + (root_present(root)?phys_to_virt( + root->val & PAGE_MASK_4K): + NULL); +} + +/* + * low 64 bits: + * 0: present + * 1: fault processing disable + * 2-3: translation type + * 12-63: address space root + * high 64 bits: + * 0-2: address width + * 3-6: aval + * 8-23: domain id + */ +struct context_entry { + u64 lo; + u64 hi; +}; +#define context_present(c) ((c).lo & 1) +#define context_fault_disable(c) (((c).lo >> 1) & 1) +#define context_translation_type(c) (((c).lo >> 2) & 3) +#define context_address_root(c) ((c).lo & PAGE_MASK_4K) +#define context_address_width(c) ((c).hi & 7) +#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) + +#define context_set_present(c) do {(c).lo |= 1;} while (0) +#define context_set_fault_enable(c) \ + do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) +#define context_set_translation_type(c, val) \ + do { \ + (c).lo &= (((u64)-1) << 4) | 3; \ + (c).lo |= ((val) & 3) << 2; \ + } while (0) +#define CONTEXT_TT_MULTI_LEVEL 0 +#define context_set_address_root(c, val) \ + do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) +#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) +#define context_set_domain_id(c, val) \ + do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) +#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) + +/* + * 0: readable + * 1: writable + * 2-6: reserved + * 7: super page + * 8-11: available + * 12-63: Host physcial address + */ +struct dma_pte { + u64 val; +}; +#define dma_clear_pte(p) do {(p).val = 0;} while (0) + +#define DMA_PTE_READ (1) +#define DMA_PTE_WRITE (2) + +#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) +#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) +#define dma_set_pte_prot(p, prot) \ + do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) +#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) +#define dma_set_pte_addr(p, addr) do {\ + (p).val |= ((addr) & PAGE_MASK_4K); } while (0) +#define dma_pte_present(p) (((p).val & 3) != 0) + +struct intel_iommu; + +struct dmar_domain { + int id; /* domain id */ + struct intel_iommu *iommu; /* back pointer to owning iommu */ + + struct list_head devices; /* all devices' list */ + struct iova_domain iovad; /* iova's that belong to this domain */ + + struct dma_pte *pgd; /* virtual address */ + spinlock_t mapping_lock; /* page table lock */ + int gaw; /* max guest address width */ + + /* adjusted guest address width, 0 is level 2 30-bit */ + int agaw; + +#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 + int flags; +}; + +/* PCI domain-device relationship */ +struct device_domain_info { + struct list_head link; /* link to domain siblings */ + struct list_head global; /* link to global list */ + u8 bus; /* PCI bus numer */ + u8 devfn; /* PCI devfn number */ + struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ + struct dmar_domain *domain; /* pointer to domain */ +}; + +extern int init_dmars(void); + +struct intel_iommu { + void __iomem *reg; /* Pointer to hardware regs, virtual addr */ + u64 cap; + u64 ecap; + unsigned long *domain_ids; /* bitmap of domains */ + struct dmar_domain **domains; /* ptr to domains */ + int seg; + u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ + spinlock_t lock; /* protect context, domain ids */ + spinlock_t register_lock; /* protect register handling */ + struct root_entry *root_entry; /* virtual address */ + + unsigned int irq; + unsigned char name[7]; /* Device Name */ + struct msi_msg saved_msg; + struct sys_device sysdev; +}; + +#ifndef CONFIG_DMAR_GFX_WA +static inline void iommu_prepare_gfx_mapping(void) +{ + return; +} +#endif /* !CONFIG_DMAR_GFX_WA */ + +#endif diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c new file mode 100644 index 000000000000..a84571c29360 --- /dev/null +++ b/drivers/pci/iova.c @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This file is released under the GPLv2. + * + * Copyright (C) 2006 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> + */ + +#include "iova.h" + +void +init_iova_domain(struct iova_domain *iovad) +{ + spin_lock_init(&iovad->iova_alloc_lock); + spin_lock_init(&iovad->iova_rbtree_lock); + iovad->rbroot = RB_ROOT; + iovad->cached32_node = NULL; + +} + +static struct rb_node * +__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) +{ + if ((*limit_pfn != DMA_32BIT_PFN) || + (iovad->cached32_node == NULL)) + return rb_last(&iovad->rbroot); + else { + struct rb_node *prev_node = rb_prev(iovad->cached32_node); + struct iova *curr_iova = + container_of(iovad->cached32_node, struct iova, node); + *limit_pfn = curr_iova->pfn_lo - 1; + return prev_node; + } +} + +static void +__cached_rbnode_insert_update(struct iova_domain *iovad, + unsigned long limit_pfn, struct iova *new) +{ + if (limit_pfn != DMA_32BIT_PFN) + return; + iovad->cached32_node = &new->node; +} + +static void +__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) +{ + struct iova *cached_iova; + struct rb_node *curr; + + if (!iovad->cached32_node) + return; + curr = iovad->cached32_node; + cached_iova = container_of(curr, struct iova, node); + + if (free->pfn_lo >= cached_iova->pfn_lo) + iovad->cached32_node = rb_next(&free->node); +} + +/* Computes the padding size required, to make the + * the start address naturally aligned on its size + */ +static int +iova_get_pad_size(int size, unsigned int limit_pfn) +{ + unsigned int pad_size = 0; + unsigned int order = ilog2(size); + + if (order) + pad_size = (limit_pfn + 1) % (1 << order); + + return pad_size; +} + +static int __alloc_iova_range(struct iova_domain *iovad, unsigned long size, + unsigned long limit_pfn, struct iova *new, bool size_aligned) +{ + struct rb_node *curr = NULL; + unsigned long flags; + unsigned long saved_pfn; + unsigned int pad_size = 0; + + /* Walk the tree backwards */ + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + saved_pfn = limit_pfn; + curr = __get_cached_rbnode(iovad, &limit_pfn); + while (curr) { + struct iova *curr_iova = container_of(curr, struct iova, node); + if (limit_pfn < curr_iova->pfn_lo) + goto move_left; + else if (limit_pfn < curr_iova->pfn_hi) + goto adjust_limit_pfn; + else { + if (size_aligned) + pad_size = iova_get_pad_size(size, limit_pfn); + if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn) + break; /* found a free slot */ + } +adjust_limit_pfn: + limit_pfn = curr_iova->pfn_lo - 1; +move_left: + curr = rb_prev(curr); + } + + if (!curr) { + if (size_aligned) + pad_size = iova_get_pad_size(size, limit_pfn); + if ((IOVA_START_PFN + size + pad_size) > limit_pfn) { + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return -ENOMEM; + } + } + + /* pfn_lo will point to size aligned address if size_aligned is set */ + new->pfn_lo = limit_pfn - (size + pad_size) + 1; + new->pfn_hi = new->pfn_lo + size - 1; + + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return 0; +} + +static void +iova_insert_rbtree(struct rb_root *root, struct iova *iova) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + /* Figure out where to put new node */ + while (*new) { + struct iova *this = container_of(*new, struct iova, node); + parent = *new; + + if (iova->pfn_lo < this->pfn_lo) + new = &((*new)->rb_left); + else if (iova->pfn_lo > this->pfn_lo) + new = &((*new)->rb_right); + else + BUG(); /* this should not happen */ + } + /* Add new node and rebalance tree. */ + rb_link_node(&iova->node, parent, new); + rb_insert_color(&iova->node, root); +} + +/** + * alloc_iova - allocates an iova + * @iovad - iova domain in question + * @size - size of page frames to allocate + * @limit_pfn - max limit address + * @size_aligned - set if size_aligned address range is required + * This function allocates an iova in the range limit_pfn to IOVA_START_PFN + * looking from limit_pfn instead from IOVA_START_PFN. If the size_aligned + * flag is set then the allocated address iova->pfn_lo will be naturally + * aligned on roundup_power_of_two(size). + */ +struct iova * +alloc_iova(struct iova_domain *iovad, unsigned long size, + unsigned long limit_pfn, + bool size_aligned) +{ + unsigned long flags; + struct iova *new_iova; + int ret; + + new_iova = alloc_iova_mem(); + if (!new_iova) + return NULL; + + /* If size aligned is set then round the size to + * to next power of two. + */ + if (size_aligned) + size = __roundup_pow_of_two(size); + + spin_lock_irqsave(&iovad->iova_alloc_lock, flags); + ret = __alloc_iova_range(iovad, size, limit_pfn, new_iova, + size_aligned); + + if (ret) { + spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags); + free_iova_mem(new_iova); + return NULL; + } + + /* Insert the new_iova into domain rbtree by holding writer lock */ + spin_lock(&iovad->iova_rbtree_lock); + iova_insert_rbtree(&iovad->rbroot, new_iova); + __cached_rbnode_insert_update(iovad, limit_pfn, new_iova); + spin_unlock(&iovad->iova_rbtree_lock); + + spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags); + + return new_iova; +} + +/** + * find_iova - find's an iova for a given pfn + * @iovad - iova domain in question. + * pfn - page frame number + * This function finds and returns an iova belonging to the + * given doamin which matches the given pfn. + */ +struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) +{ + unsigned long flags; + struct rb_node *node; + + /* Take the lock so that no other thread is manipulating the rbtree */ + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + node = iovad->rbroot.rb_node; + while (node) { + struct iova *iova = container_of(node, struct iova, node); + + /* If pfn falls within iova's range, return iova */ + if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) { + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + /* We are not holding the lock while this iova + * is referenced by the caller as the same thread + * which called this function also calls __free_iova() + * and it is by desing that only one thread can possibly + * reference a particular iova and hence no conflict. + */ + return iova; + } + + if (pfn < iova->pfn_lo) + node = node->rb_left; + else if (pfn > iova->pfn_lo) + node = node->rb_right; + } + + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return NULL; +} + +/** + * __free_iova - frees the given iova + * @iovad: iova domain in question. + * @iova: iova in question. + * Frees the given iova belonging to the giving domain + */ +void +__free_iova(struct iova_domain *iovad, struct iova *iova) +{ + unsigned long flags; + + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + __cached_rbnode_delete_update(iovad, iova); + rb_erase(&iova->node, &iovad->rbroot); + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + free_iova_mem(iova); +} + +/** + * free_iova - finds and frees the iova for a given pfn + * @iovad: - iova domain in question. + * @pfn: - pfn that is allocated previously + * This functions finds an iova for a given pfn and then + * frees the iova from that domain. + */ +void +free_iova(struct iova_domain *iovad, unsigned long pfn) +{ + struct iova *iova = find_iova(iovad, pfn); + if (iova) + __free_iova(iovad, iova); + +} + +/** + * put_iova_domain - destroys the iova doamin + * @iovad: - iova domain in question. + * All the iova's in that domain are destroyed. + */ +void put_iova_domain(struct iova_domain *iovad) +{ + struct rb_node *node; + unsigned long flags; + + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + node = rb_first(&iovad->rbroot); + while (node) { + struct iova *iova = container_of(node, struct iova, node); + rb_erase(node, &iovad->rbroot); + free_iova_mem(iova); + node = rb_first(&iovad->rbroot); + } + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); +} + +static int +__is_range_overlap(struct rb_node *node, + unsigned long pfn_lo, unsigned long pfn_hi) +{ + struct iova *iova = container_of(node, struct iova, node); + + if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo)) + return 1; + return 0; +} + +static struct iova * +__insert_new_range(struct iova_domain *iovad, + unsigned long pfn_lo, unsigned long pfn_hi) +{ + struct iova *iova; + + iova = alloc_iova_mem(); + if (!iova) + return iova; + + iova->pfn_hi = pfn_hi; + iova->pfn_lo = pfn_lo; + iova_insert_rbtree(&iovad->rbroot, iova); + return iova; +} + +static void +__adjust_overlap_range(struct iova *iova, + unsigned long *pfn_lo, unsigned long *pfn_hi) +{ + if (*pfn_lo < iova->pfn_lo) + iova->pfn_lo = *pfn_lo; + if (*pfn_hi > iova->pfn_hi) + *pfn_lo = iova->pfn_hi + 1; +} + +/** + * reserve_iova - reserves an iova in the given range + * @iovad: - iova domain pointer + * @pfn_lo: - lower page frame address + * @pfn_hi:- higher pfn adderss + * This function allocates reserves the address range from pfn_lo to pfn_hi so + * that this address is not dished out as part of alloc_iova. + */ +struct iova * +reserve_iova(struct iova_domain *iovad, + unsigned long pfn_lo, unsigned long pfn_hi) +{ + struct rb_node *node; + unsigned long flags; + struct iova *iova; + unsigned int overlap = 0; + + spin_lock_irqsave(&iovad->iova_alloc_lock, flags); + spin_lock(&iovad->iova_rbtree_lock); + for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { + if (__is_range_overlap(node, pfn_lo, pfn_hi)) { + iova = container_of(node, struct iova, node); + __adjust_overlap_range(iova, &pfn_lo, &pfn_hi); + if ((pfn_lo >= iova->pfn_lo) && + (pfn_hi <= iova->pfn_hi)) + goto finish; + overlap = 1; + + } else if (overlap) + break; + } + + /* We are here either becasue this is the first reserver node + * or need to insert remaining non overlap addr range + */ + iova = __insert_new_range(iovad, pfn_lo, pfn_hi); +finish: + + spin_unlock(&iovad->iova_rbtree_lock); + spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags); + return iova; +} + +/** + * copy_reserved_iova - copies the reserved between domains + * @from: - source doamin from where to copy + * @to: - destination domin where to copy + * This function copies reserved iova's from one doamin to + * other. + */ +void +copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) +{ + unsigned long flags; + struct rb_node *node; + + spin_lock_irqsave(&from->iova_alloc_lock, flags); + spin_lock(&from->iova_rbtree_lock); + for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { + struct iova *iova = container_of(node, struct iova, node); + struct iova *new_iova; + new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); + if (!new_iova) + printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", + iova->pfn_lo, iova->pfn_lo); + } + spin_unlock(&from->iova_rbtree_lock); + spin_unlock_irqrestore(&from->iova_alloc_lock, flags); +} diff --git a/drivers/pci/iova.h b/drivers/pci/iova.h new file mode 100644 index 000000000000..ae3028d5a941 --- /dev/null +++ b/drivers/pci/iova.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This file is released under the GPLv2. + * + * Copyright (C) 2006 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> + * + */ + +#ifndef _IOVA_H_ +#define _IOVA_H_ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/rbtree.h> +#include <linux/dma-mapping.h> + +/* + * We need a fixed PAGE_SIZE of 4K irrespective of + * arch PAGE_SIZE for IOMMU page tables. + */ +#define PAGE_SHIFT_4K (12) +#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) +#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) +#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) + +/* IO virtual address start page frame number */ +#define IOVA_START_PFN (1) + +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) +#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) +#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) + +/* iova structure */ +struct iova { + struct rb_node node; + unsigned long pfn_hi; /* IOMMU dish out addr hi */ + unsigned long pfn_lo; /* IOMMU dish out addr lo */ +}; + +/* holds all the iova translations for a domain */ +struct iova_domain { + spinlock_t iova_alloc_lock;/* Lock to protect iova allocation */ + spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ + struct rb_root rbroot; /* iova domain rbtree root */ + struct rb_node *cached32_node; /* Save last alloced node */ +}; + +struct iova *alloc_iova_mem(void); +void free_iova_mem(struct iova *iova); +void free_iova(struct iova_domain *iovad, unsigned long pfn); +void __free_iova(struct iova_domain *iovad, struct iova *iova); +struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, + unsigned long limit_pfn, + bool size_aligned); +struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, + unsigned long pfn_hi); +void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); +void init_iova_domain(struct iova_domain *iovad); +struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); +void put_iova_domain(struct iova_domain *iovad); + +#endif diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 6fda33de84e8..fc87e14b50de 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -90,3 +90,4 @@ pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) return NULL; } +struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 5db6b6690b59..463a5a9d583d 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -837,6 +837,19 @@ static void pci_release_dev(struct device *dev) kfree(pci_dev); } +static void set_pcie_port_type(struct pci_dev *pdev) +{ + int pos; + u16 reg16; + + pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (!pos) + return; + pdev->is_pcie = 1; + pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); + pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; +} + /** * pci_cfg_space_size - get the configuration space size of the PCI device. * @dev: PCI device @@ -951,6 +964,7 @@ pci_scan_device(struct pci_bus *bus, int devfn) dev->device = (l >> 16) & 0xffff; dev->cfg_size = pci_cfg_space_size(dev); dev->error_state = pci_channel_io_normal; + set_pcie_port_type(dev); /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) set this higher, assuming the system even supports it. */ diff --git a/drivers/pci/search.c b/drivers/pci/search.c index c6e79d01ce3d..b001b5922e33 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -14,6 +14,40 @@ #include "pci.h" DECLARE_RWSEM(pci_bus_sem); +/* + * find the upstream PCIE-to-PCI bridge of a PCI device + * if the device is PCIE, return NULL + * if the device isn't connected to a PCIE bridge (that is its parent is a + * legacy PCI bridge and the bridge is directly connected to bus 0), return its + * parent + */ +struct pci_dev * +pci_find_upstream_pcie_bridge(struct pci_dev *pdev) +{ + struct pci_dev *tmp = NULL; + + if (pdev->is_pcie) + return NULL; + while (1) { + if (!pdev->bus->self) + break; + pdev = pdev->bus->self; + /* a p2p bridge */ + if (!pdev->is_pcie) { + tmp = pdev; + continue; + } + /* PCI device should connect to a PCIE bridge */ + if (pdev->pcie_type != PCI_EXP_TYPE_PCI_BRIDGE) { + /* Busted hardware? */ + WARN_ON_ONCE(1); + return NULL; + } + return pdev; + } + + return tmp; +} static struct pci_bus *pci_do_find_bus(struct pci_bus *bus, unsigned char busnr) { diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index fb14014ee16e..afb262b4be15 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -1840,7 +1840,7 @@ static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, (scsi_bufflen(srb) < TW_MIN_SGL_LENGTH)) { if (srb->sc_data_direction == DMA_TO_DEVICE || srb->sc_data_direction == DMA_BIDIRECTIONAL) { struct scatterlist *sg = scsi_sglist(srb); - char *buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + char *buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset; memcpy(tw_dev->generic_buffer_virt[request_id], buf, sg->length); kunmap_atomic(buf - sg->offset, KM_IRQ0); } @@ -1919,7 +1919,7 @@ static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int re char *buf; unsigned long flags = 0; local_irq_save(flags); - buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset; memcpy(buf, tw_dev->generic_buffer_virt[request_id], sg->length); kunmap_atomic(buf - sg->offset, KM_IRQ0); local_irq_restore(flags); diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c index a64153b96034..59716ebeb10c 100644 --- a/drivers/scsi/3w-xxxx.c +++ b/drivers/scsi/3w-xxxx.c @@ -1469,7 +1469,7 @@ static void tw_transfer_internal(TW_Device_Extension *tw_dev, int request_id, struct scatterlist *sg = scsi_sglist(cmd); local_irq_save(flags); - buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset; transfer_len = min(sg->length, len); memcpy(buf, data, transfer_len); diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index 988f0bc5eda5..2597209183d0 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -298,8 +298,7 @@ static __inline__ void initialize_SCp(Scsi_Cmnd * cmd) if (cmd->use_sg) { cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer; cmd->SCp.buffers_residual = cmd->use_sg - 1; - cmd->SCp.ptr = page_address(cmd->SCp.buffer->page)+ - cmd->SCp.buffer->offset; + cmd->SCp.ptr = sg_virt(cmd->SCp.buffer); cmd->SCp.this_residual = cmd->SCp.buffer->length; } else { cmd->SCp.buffer = NULL; @@ -2143,8 +2142,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) { ++cmd->SCp.buffer; --cmd->SCp.buffers_residual; cmd->SCp.this_residual = cmd->SCp.buffer->length; - cmd->SCp.ptr = page_address(cmd->SCp.buffer->page)+ - cmd->SCp.buffer->offset; + cmd->SCp.ptr = sg_virt(cmd->SCp.buffer); dprintk(NDEBUG_INFORMATION, ("scsi%d : %d bytes and %d buffers left\n", instance->host_no, cmd->SCp.this_residual, cmd->SCp.buffers_residual)); } /* diff --git a/drivers/scsi/NCR53C9x.c b/drivers/scsi/NCR53C9x.c index 96e8e29aa05d..5b0efc903918 100644 --- a/drivers/scsi/NCR53C9x.c +++ b/drivers/scsi/NCR53C9x.c @@ -927,7 +927,7 @@ static void esp_get_dmabufs(struct NCR_ESP *esp, Scsi_Cmnd *sp) esp->dma_mmu_get_scsi_sgl(esp, sp); else sp->SCp.ptr = - (char *) virt_to_phys((page_address(sp->SCp.buffer->page) + sp->SCp.buffer->offset)); + (char *) virt_to_phys(sg_virt(sp->SCp.buffer)); } } @@ -1748,7 +1748,7 @@ static inline void advance_sg(struct NCR_ESP *esp, Scsi_Cmnd *sp) if (esp->dma_advance_sg) esp->dma_advance_sg (sp); else - sp->SCp.ptr = (char *) virt_to_phys((page_address(sp->SCp.buffer->page) + sp->SCp.buffer->offset)); + sp->SCp.ptr = (char *) virt_to_phys(sg_virt(sp->SCp.buffer)); } diff --git a/drivers/scsi/NCR53c406a.c b/drivers/scsi/NCR53c406a.c index 3168a1794849..137d065db3da 100644 --- a/drivers/scsi/NCR53c406a.c +++ b/drivers/scsi/NCR53c406a.c @@ -875,8 +875,7 @@ static void NCR53c406a_intr(void *dev_id) outb(TRANSFER_INFO | DMA_OP, CMD_REG); #if USE_PIO scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) { - NCR53c406a_pio_write(page_address(sg->page) + sg->offset, - sg->length); + NCR53c406a_pio_write(sg_virt(sg), sg->length); } REG0; #endif /* USE_PIO */ @@ -897,8 +896,7 @@ static void NCR53c406a_intr(void *dev_id) outb(TRANSFER_INFO | DMA_OP, CMD_REG); #if USE_PIO scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) { - NCR53c406a_pio_read(page_address(sg->page) + sg->offset, - sg->length); + NCR53c406a_pio_read(sg_virt(sg), sg->length); } REG0; #endif /* USE_PIO */ diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index 80e448d0f3db..a77ab8d693d4 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -356,7 +356,7 @@ static void aac_internal_transfer(struct scsi_cmnd *scsicmd, void *data, unsigne int transfer_len; struct scatterlist *sg = scsi_sglist(scsicmd); - buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset; transfer_len = min(sg->length, len + offset); transfer_len -= offset; diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c index a58c265dc8af..ea8c69947644 100644 --- a/drivers/scsi/aha152x.c +++ b/drivers/scsi/aha152x.c @@ -613,7 +613,7 @@ struct aha152x_scdata { #define SCNEXT(SCpnt) SCDATA(SCpnt)->next #define SCSEM(SCpnt) SCDATA(SCpnt)->done -#define SG_ADDRESS(buffer) ((char *) (page_address((buffer)->page)+(buffer)->offset)) +#define SG_ADDRESS(buffer) ((char *) sg_virt((buffer))) /* state handling */ static void seldi_run(struct Scsi_Host *shpnt); diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c index 961a1882cb7e..bbcc2c52d79f 100644 --- a/drivers/scsi/aha1542.c +++ b/drivers/scsi/aha1542.c @@ -49,7 +49,7 @@ #include "aha1542.h" #define SCSI_BUF_PA(address) isa_virt_to_bus(address) -#define SCSI_SG_PA(sgent) (isa_page_to_bus((sgent)->page) + (sgent)->offset) +#define SCSI_SG_PA(sgent) (isa_page_to_bus(sg_page((sgent))) + (sgent)->offset) static void BAD_DMA(void *address, unsigned int length) { @@ -66,8 +66,7 @@ static void BAD_SG_DMA(Scsi_Cmnd * SCpnt, int badseg) { printk(KERN_CRIT "sgpnt[%d:%d] page %p/0x%llx length %u\n", - badseg, nseg, - page_address(sgp->page) + sgp->offset, + badseg, nseg, sg_virt(sgp), (unsigned long long)SCSI_SG_PA(sgp), sgp->length); @@ -712,8 +711,7 @@ static int aha1542_queuecommand(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *)) printk(KERN_CRIT "Bad segment list supplied to aha1542.c (%d, %d)\n", SCpnt->use_sg, i); scsi_for_each_sg(SCpnt, sg, SCpnt->use_sg, i) { printk(KERN_CRIT "%d: %p %d\n", i, - (page_address(sg->page) + - sg->offset), sg->length); + sg_virt(sg), sg->length); }; printk(KERN_CRIT "cptr %x: ", (unsigned int) cptr); ptr = (unsigned char *) &cptr[i]; diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c index f81777586b8f..f7a252885a5c 100644 --- a/drivers/scsi/arcmsr/arcmsr_hba.c +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -1343,7 +1343,7 @@ static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, \ /* 4 bytes: Areca io control code */ sg = scsi_sglist(cmd); - buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + buffer = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset; if (scsi_sg_count(cmd) > 1) { retvalue = ARCMSR_MESSAGE_FAIL; goto message_out; @@ -1593,7 +1593,7 @@ static void arcmsr_handle_virtual_command(struct AdapterControlBlock *acb, strncpy(&inqdata[32], "R001", 4); /* Product Revision */ sg = scsi_sglist(cmd); - buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + buffer = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset; memcpy(buffer, inqdata, sizeof(inqdata)); sg = scsi_sglist(cmd); diff --git a/drivers/scsi/atari_NCR5380.c b/drivers/scsi/atari_NCR5380.c index 52d0b87e9aa4..d1780980fb20 100644 --- a/drivers/scsi/atari_NCR5380.c +++ b/drivers/scsi/atari_NCR5380.c @@ -515,8 +515,7 @@ static inline void initialize_SCp(Scsi_Cmnd *cmd) if (cmd->use_sg) { cmd->SCp.buffer = (struct scatterlist *)cmd->request_buffer; cmd->SCp.buffers_residual = cmd->use_sg - 1; - cmd->SCp.ptr = (char *)page_address(cmd->SCp.buffer->page) + - cmd->SCp.buffer->offset; + cmd->SCp.ptr = sg_virt(cmd->SCp.buffer); cmd->SCp.this_residual = cmd->SCp.buffer->length; /* ++roman: Try to merge some scatter-buffers if they are at * contiguous physical addresses. @@ -2054,8 +2053,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) ++cmd->SCp.buffer; --cmd->SCp.buffers_residual; cmd->SCp.this_residual = cmd->SCp.buffer->length; - cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) + - cmd->SCp.buffer->offset; + cmd->SCp.ptr = sg_virt(cmd->SCp.buffer); /* ++roman: Try to merge some scatter-buffers if * they are at contiguous physical addresses. */ diff --git a/drivers/scsi/eata_pio.c b/drivers/scsi/eata_pio.c index 96180bb47e41..982c5092be11 100644 --- a/drivers/scsi/eata_pio.c +++ b/drivers/scsi/eata_pio.c @@ -172,7 +172,7 @@ static void IncStat(struct scsi_pointer *SCp, unsigned int Increment) SCp->Status = 0; else { SCp->buffer++; - SCp->ptr = page_address(SCp->buffer->page) + SCp->buffer->offset; + SCp->ptr = sg_virt(SCp->buffer); SCp->this_residual = SCp->buffer->length; } } @@ -410,7 +410,7 @@ static int eata_pio_queue(struct scsi_cmnd *cmd, } else { cmd->SCp.buffer = cmd->request_buffer; cmd->SCp.buffers_residual = cmd->use_sg; - cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) + cmd->SCp.buffer->offset; + cmd->SCp.ptr = sg_virt(cmd->SCp.buffer); cmd->SCp.this_residual = cmd->SCp.buffer->length; } cmd->SCp.Status = (cmd->SCp.this_residual != 0); /* TRUE as long as bytes diff --git a/drivers/scsi/fd_mcs.c b/drivers/scsi/fd_mcs.c index 668569e8856b..8335b608e571 100644 --- a/drivers/scsi/fd_mcs.c +++ b/drivers/scsi/fd_mcs.c @@ -973,7 +973,7 @@ static irqreturn_t fd_mcs_intr(int irq, void *dev_id) if (current_SC->SCp.buffers_residual) { --current_SC->SCp.buffers_residual; ++current_SC->SCp.buffer; - current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset; + current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer); current_SC->SCp.this_residual = current_SC->SCp.buffer->length; } else break; @@ -1006,7 +1006,7 @@ static irqreturn_t fd_mcs_intr(int irq, void *dev_id) if (!current_SC->SCp.this_residual && current_SC->SCp.buffers_residual) { --current_SC->SCp.buffers_residual; ++current_SC->SCp.buffer; - current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset; + current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer); current_SC->SCp.this_residual = current_SC->SCp.buffer->length; } } @@ -1109,7 +1109,7 @@ static int fd_mcs_queue(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *)) if (current_SC->use_sg) { current_SC->SCp.buffer = (struct scatterlist *) current_SC->request_buffer; - current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset; + current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer); current_SC->SCp.this_residual = current_SC->SCp.buffer->length; current_SC->SCp.buffers_residual = current_SC->use_sg - 1; } else { diff --git a/drivers/scsi/fdomain.c b/drivers/scsi/fdomain.c index 5d282e6a6ae1..2cd6b4959eb2 100644 --- a/drivers/scsi/fdomain.c +++ b/drivers/scsi/fdomain.c @@ -1321,7 +1321,7 @@ static irqreturn_t do_fdomain_16x0_intr(int irq, void *dev_id) if (current_SC->SCp.buffers_residual) { --current_SC->SCp.buffers_residual; ++current_SC->SCp.buffer; - current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset; + current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer); current_SC->SCp.this_residual = current_SC->SCp.buffer->length; } else break; @@ -1354,7 +1354,7 @@ static irqreturn_t do_fdomain_16x0_intr(int irq, void *dev_id) && current_SC->SCp.buffers_residual) { --current_SC->SCp.buffers_residual; ++current_SC->SCp.buffer; - current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset; + current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer); current_SC->SCp.this_residual = current_SC->SCp.buffer->length; } } @@ -1439,8 +1439,7 @@ static int fdomain_16x0_queue(struct scsi_cmnd *SCpnt, if (scsi_sg_count(current_SC)) { current_SC->SCp.buffer = scsi_sglist(current_SC); - current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) - + current_SC->SCp.buffer->offset; + current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer); current_SC->SCp.this_residual = current_SC->SCp.buffer->length; current_SC->SCp.buffers_residual = scsi_sg_count(current_SC) - 1; } else { diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c index 3ac080ee6e2f..5ab3ce762485 100644 --- a/drivers/scsi/gdth.c +++ b/drivers/scsi/gdth.c @@ -2374,18 +2374,18 @@ static void gdth_copy_internal_data(gdth_ha_str *ha, Scsi_Cmnd *scp, if (cpsum+cpnow > cpcount) cpnow = cpcount - cpsum; cpsum += cpnow; - if (!sl->page) { + if (!sg_page(sl)) { printk("GDT-HA %d: invalid sc/gt element in gdth_copy_internal_data()\n", ha->hanum); return; } local_irq_save(flags); - address = kmap_atomic(sl->page, KM_BIO_SRC_IRQ) + sl->offset; + address = kmap_atomic(sg_page(sl), KM_BIO_SRC_IRQ) + sl->offset; if (to_buffer) memcpy(buffer, address, cpnow); else memcpy(address, buffer, cpnow); - flush_dcache_page(sl->page); + flush_dcache_page(sg_page(sl)); kunmap_atomic(address, KM_BIO_SRC_IRQ); local_irq_restore(flags); if (cpsum == cpcount) diff --git a/drivers/scsi/ibmmca.c b/drivers/scsi/ibmmca.c index 714e6273a70d..db004a450732 100644 --- a/drivers/scsi/ibmmca.c +++ b/drivers/scsi/ibmmca.c @@ -1828,7 +1828,7 @@ static int ibmmca_queuecommand(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *)) BUG_ON(scsi_sg_count(cmd) > 16); scsi_for_each_sg(cmd, sg, scsi_sg_count(cmd), i) { - ld(shpnt)[ldn].sge[i].address = (void *) (isa_page_to_bus(sg->page) + sg->offset); + ld(shpnt)[ldn].sge[i].address = (void *) (isa_page_to_bus(sg_page(sg)) + sg->offset); ld(shpnt)[ldn].sge[i].byte_length = sg->length; } scb->enable |= IM_POINTER_TO_LIST; diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 252d1806467f..8d0244c2e7d4 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -175,18 +175,18 @@ static void idescsi_input_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsigne while (bcount) { count = min(pc->sg->length - pc->b_count, bcount); - if (PageHighMem(pc->sg->page)) { + if (PageHighMem(sg_page(pc->sg))) { unsigned long flags; local_irq_save(flags); - buf = kmap_atomic(pc->sg->page, KM_IRQ0) + + buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) + pc->sg->offset; drive->hwif->atapi_input_bytes(drive, buf + pc->b_count, count); kunmap_atomic(buf - pc->sg->offset, KM_IRQ0); local_irq_restore(flags); } else { - buf = page_address(pc->sg->page) + pc->sg->offset; + buf = sg_virt(pc->sg); drive->hwif->atapi_input_bytes(drive, buf + pc->b_count, count); } @@ -212,18 +212,18 @@ static void idescsi_output_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsign while (bcount) { count = min(pc->sg->length - pc->b_count, bcount); - if (PageHighMem(pc->sg->page)) { + if (PageHighMem(sg_page(pc->sg))) { unsigned long flags; local_irq_save(flags); - buf = kmap_atomic(pc->sg->page, KM_IRQ0) + + buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) + pc->sg->offset; drive->hwif->atapi_output_bytes(drive, buf + pc->b_count, count); kunmap_atomic(buf - pc->sg->offset, KM_IRQ0); local_irq_restore(flags); } else { - buf = page_address(pc->sg->page) + pc->sg->offset; + buf = sg_virt(pc->sg); drive->hwif->atapi_output_bytes(drive, buf + pc->b_count, count); } diff --git a/drivers/scsi/imm.c b/drivers/scsi/imm.c index 74cdc1f0a78f..a3d0c6b14958 100644 --- a/drivers/scsi/imm.c +++ b/drivers/scsi/imm.c @@ -705,9 +705,7 @@ static int imm_completion(struct scsi_cmnd *cmd) cmd->SCp.buffer++; cmd->SCp.this_residual = cmd->SCp.buffer->length; - cmd->SCp.ptr = - page_address(cmd->SCp.buffer->page) + - cmd->SCp.buffer->offset; + cmd->SCp.ptr = sg_virt(cmd->SCp.buffer); /* * Make sure that we transfer even number of bytes @@ -844,9 +842,7 @@ static int imm_engine(imm_struct *dev, struct scsi_cmnd *cmd) cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer; cmd->SCp.this_residual = cmd->SCp.buffer->length; - cmd->SCp.ptr = - page_address(cmd->SCp.buffer->page) + - cmd->SCp.buffer->offset; + cmd->SCp.ptr = sg_virt(cmd->SCp.buffer); } else { /* else fill the only available buffer */ cmd->SCp.buffer = NULL; diff --git a/drivers/scsi/in2000.c b/drivers/scsi/in2000.c index ab7cbf3449ce..c8b452f2878c 100644 --- a/drivers/scsi/in2000.c +++ b/drivers/scsi/in2000.c @@ -372,7 +372,7 @@ static int in2000_queuecommand(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *)) if (cmd->use_sg) { cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer; cmd->SCp.buffers_residual = cmd->use_sg - 1; - cmd->SCp.ptr = (char *) page_address(cmd->SCp.buffer->page) + cmd->SCp.buffer->offset; + cmd->SCp.ptr = sg_virt(cmd->SCp.buffer); cmd->SCp.this_residual = cmd->SCp.buffer->length; } else { cmd->SCp.buffer = NULL; @@ -764,7 +764,7 @@ static void transfer_bytes(Scsi_Cmnd * cmd, int data_in_dir) ++cmd->SCp.buffer; --cmd->SCp.buffers_residual; cmd->SCp.this_residual = cmd->SCp.buffer->length; - cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) + cmd->SCp.buffer->offset; + cmd->SCp.ptr = sg_virt(cmd->SCp.buffer); } /* Set up hardware registers */ diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index c316a0bcae6c..439b97a6a269 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -2872,6 +2872,7 @@ static struct ipr_sglist *ipr_alloc_ucode_buffer(int buf_len) } scatterlist = sglist->scatterlist; + sg_init_table(scatterlist, num_elem); sglist->order = order; sglist->num_sg = num_elem; @@ -2884,12 +2885,12 @@ static struct ipr_sglist *ipr_alloc_ucode_buffer(int buf_len) /* Free up what we already allocated */ for (j = i - 1; j >= 0; j--) - __free_pages(scatterlist[j].page, order); + __free_pages(sg_page(&scatterlist[j]), order); kfree(sglist); return NULL; } - scatterlist[i].page = page; + sg_set_page(&scatterlist[i], page); } return sglist; @@ -2910,7 +2911,7 @@ static void ipr_free_ucode_buffer(struct ipr_sglist *sglist) int i; for (i = 0; i < sglist->num_sg; i++) - __free_pages(sglist->scatterlist[i].page, sglist->order); + __free_pages(sg_page(&sglist->scatterlist[i]), sglist->order); kfree(sglist); } @@ -2940,9 +2941,11 @@ static int ipr_copy_ucode_buffer(struct ipr_sglist *sglist, scatterlist = sglist->scatterlist; for (i = 0; i < (len / bsize_elem); i++, buffer += bsize_elem) { - kaddr = kmap(scatterlist[i].page); + struct page *page = sg_page(&scatterlist[i]); + + kaddr = kmap(page); memcpy(kaddr, buffer, bsize_elem); - kunmap(scatterlist[i].page); + kunmap(page); scatterlist[i].length = bsize_elem; @@ -2953,9 +2956,11 @@ static int ipr_copy_ucode_buffer(struct ipr_sglist *sglist, } if (len % bsize_elem) { - kaddr = kmap(scatterlist[i].page); + struct page *page = sg_page(&scatterlist[i]); + + kaddr = kmap(page); memcpy(kaddr, buffer, len % bsize_elem); - kunmap(scatterlist[i].page); + kunmap(page); scatterlist[i].length = len % bsize_elem; } diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c index edaac2714c5a..5c5a9b2628fc 100644 --- a/drivers/scsi/ips.c +++ b/drivers/scsi/ips.c @@ -1515,7 +1515,7 @@ static int ips_is_passthru(struct scsi_cmnd *SC) /* kmap_atomic() ensures addressability of the user buffer.*/ /* local_irq_save() protects the KM_IRQ0 address slot. */ local_irq_save(flags); - buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + buffer = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset; if (buffer && buffer[0] == 'C' && buffer[1] == 'O' && buffer[2] == 'P' && buffer[3] == 'P') { kunmap_atomic(buffer - sg->offset, KM_IRQ0); @@ -3523,7 +3523,7 @@ ips_scmd_buf_write(struct scsi_cmnd *scmd, void *data, unsigned int count) /* kmap_atomic() ensures addressability of the data buffer.*/ /* local_irq_save() protects the KM_IRQ0 address slot. */ local_irq_save(flags); - buffer = kmap_atomic(sg[i].page, KM_IRQ0) + sg[i].offset; + buffer = kmap_atomic(sg_page(&sg[i]), KM_IRQ0) + sg[i].offset; memcpy(buffer, &cdata[xfer_cnt], min_cnt); kunmap_atomic(buffer - sg[i].offset, KM_IRQ0); local_irq_restore(flags); @@ -3556,7 +3556,7 @@ ips_scmd_buf_read(struct scsi_cmnd *scmd, void *data, unsigned int count) /* kmap_atomic() ensures addressability of the data buffer.*/ /* local_irq_save() protects the KM_IRQ0 address slot. */ local_irq_save(flags); - buffer = kmap_atomic(sg[i].page, KM_IRQ0) + sg[i].offset; + buffer = kmap_atomic(sg_page(&sg[i]), KM_IRQ0) + sg[i].offset; memcpy(&cdata[xfer_cnt], buffer, min_cnt); kunmap_atomic(buffer - sg[i].offset, KM_IRQ0); local_irq_restore(flags); diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index a21455d0274c..6ce4109efdf3 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -70,9 +70,7 @@ module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); static inline void iscsi_buf_init_iov(struct iscsi_buf *ibuf, char *vbuf, int size) { - ibuf->sg.page = virt_to_page(vbuf); - ibuf->sg.offset = offset_in_page(vbuf); - ibuf->sg.length = size; + sg_init_one(&ibuf->sg, vbuf, size); ibuf->sent = 0; ibuf->use_sendmsg = 1; } @@ -80,13 +78,14 @@ iscsi_buf_init_iov(struct iscsi_buf *ibuf, char *vbuf, int size) static inline void iscsi_buf_init_sg(struct iscsi_buf *ibuf, struct scatterlist *sg) { - ibuf->sg.page = sg->page; + sg_init_table(&ibuf->sg, 1); + sg_set_page(&ibuf->sg, sg_page(sg)); ibuf->sg.offset = sg->offset; ibuf->sg.length = sg->length; /* * Fastpath: sg element fits into single page */ - if (sg->length + sg->offset <= PAGE_SIZE && !PageSlab(sg->page)) + if (sg->length + sg->offset <= PAGE_SIZE && !PageSlab(sg_page(sg))) ibuf->use_sendmsg = 0; else ibuf->use_sendmsg = 1; @@ -716,7 +715,7 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn) for (i = tcp_ctask->sg_count; i < scsi_sg_count(sc); i++) { char *dest; - dest = kmap_atomic(sg[i].page, KM_SOFTIRQ0); + dest = kmap_atomic(sg_page(&sg[i]), KM_SOFTIRQ0); rc = iscsi_ctask_copy(tcp_conn, ctask, dest + sg[i].offset, sg[i].length, offset); kunmap_atomic(dest, KM_SOFTIRQ0); @@ -1103,9 +1102,9 @@ iscsi_send(struct iscsi_conn *conn, struct iscsi_buf *buf, int size, int flags) * slab case. */ if (buf->use_sendmsg) - res = sock_no_sendpage(sk, buf->sg.page, offset, size, flags); + res = sock_no_sendpage(sk, sg_page(&buf->sg), offset, size, flags); else - res = tcp_conn->sendpage(sk, buf->sg.page, offset, size, flags); + res = tcp_conn->sendpage(sk, sg_page(&buf->sg), offset, size, flags); if (res >= 0) { conn->txdata_octets += res; diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 10d1aff9938a..66c652035730 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -658,7 +658,7 @@ mega_build_cmd(adapter_t *adapter, Scsi_Cmnd *cmd, int *busy) struct scatterlist *sg; sg = scsi_sglist(cmd); - buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset; memset(buf, 0, cmd->cmnd[4]); kunmap_atomic(buf - sg->offset, KM_IRQ0); @@ -1542,10 +1542,8 @@ mega_cmd_done(adapter_t *adapter, u8 completed[], int nstatus, int status) if( cmd->cmnd[0] == INQUIRY && !islogical ) { sgl = scsi_sglist(cmd); - if( sgl->page ) { - c = *(unsigned char *) - page_address((&sgl[0])->page) + - (&sgl[0])->offset; + if( sg_page(sgl) ) { + c = *(unsigned char *) sg_virt(&sgl[0]); } else { printk(KERN_WARNING "megaraid: invalid sg.\n"); diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c index 78779209ac89..c8923108183a 100644 --- a/drivers/scsi/megaraid/megaraid_mbox.c +++ b/drivers/scsi/megaraid/megaraid_mbox.c @@ -1584,10 +1584,8 @@ megaraid_mbox_build_cmd(adapter_t *adapter, struct scsi_cmnd *scp, int *busy) caddr_t vaddr; sgl = scsi_sglist(scp); - if (sgl->page) { - vaddr = (caddr_t) - (page_address((&sgl[0])->page) - + (&sgl[0])->offset); + if (sg_page(sgl)) { + vaddr = (caddr_t) sg_virt(&sgl[0]); memset(vaddr, 0, scp->cmnd[4]); } @@ -2328,10 +2326,8 @@ megaraid_mbox_dpc(unsigned long devp) && IS_RAID_CH(raid_dev, scb->dev_channel)) { sgl = scsi_sglist(scp); - if (sgl->page) { - c = *(unsigned char *) - (page_address((&sgl[0])->page) + - (&sgl[0])->offset); + if (sg_page(sgl)) { + c = *(unsigned char *) sg_virt(&sgl[0]); } else { con_log(CL_ANN, (KERN_WARNING "megaraid mailbox: invalid sg:%d\n", diff --git a/drivers/scsi/oktagon_esp.c b/drivers/scsi/oktagon_esp.c index 26a6d55faf3e..8e5eadbd5c51 100644 --- a/drivers/scsi/oktagon_esp.c +++ b/drivers/scsi/oktagon_esp.c @@ -550,8 +550,7 @@ void dma_mmu_get_scsi_one(struct NCR_ESP *esp, Scsi_Cmnd *sp) void dma_mmu_get_scsi_sgl(struct NCR_ESP *esp, Scsi_Cmnd *sp) { - sp->SCp.ptr = page_address(sp->SCp.buffer->page)+ - sp->SCp.buffer->offset; + sp->SCp.ptr = sg_virt(sp->SCp.buffer); } void dma_mmu_release_scsi_one(struct NCR_ESP *esp, Scsi_Cmnd *sp) @@ -564,8 +563,7 @@ void dma_mmu_release_scsi_sgl(struct NCR_ESP *esp, Scsi_Cmnd *sp) void dma_advance_sg(Scsi_Cmnd *sp) { - sp->SCp.ptr = page_address(sp->SCp.buffer->page)+ - sp->SCp.buffer->offset; + sp->SCp.ptr = sg_virt(sp->SCp.buffer); } diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c index 331b789937c4..1c5c4b68f20f 100644 --- a/drivers/scsi/osst.c +++ b/drivers/scsi/osst.c @@ -542,7 +542,7 @@ static int osst_verify_frame(struct osst_tape * STp, int frame_seq_number, int q if (STp->raw) { if (STp->buffer->syscall_result) { for (i=0; i < STp->buffer->sg_segs; i++) - memset(page_address(STp->buffer->sg[i].page), + memset(page_address(sg_page(&STp->buffer->sg[i])), 0, STp->buffer->sg[i].length); strcpy(STp->buffer->b_data, "READ ERROR ON FRAME"); } else @@ -4437,7 +4437,7 @@ static int os_scsi_tape_open(struct inode * inode, struct file * filp) for (i = 0, b_size = 0; (i < STp->buffer->sg_segs) && ((b_size + STp->buffer->sg[i].length) <= OS_DATA_SIZE); b_size += STp->buffer->sg[i++].length); - STp->buffer->aux = (os_aux_t *) (page_address(STp->buffer->sg[i].page) + OS_DATA_SIZE - b_size); + STp->buffer->aux = (os_aux_t *) (page_address(sg_page(&STp->buffer->sg[i])) + OS_DATA_SIZE - b_size); #if DEBUG printk(OSST_DEB_MSG "%s:D: b_data points to %p in segment 0 at %p\n", name, STp->buffer->b_data, page_address(STp->buffer->sg[0].page)); @@ -5252,25 +5252,26 @@ static int enlarge_buffer(struct osst_buffer *STbuffer, int need_dma) /* Try to allocate the first segment up to OS_DATA_SIZE and the others big enough to reach the goal (code assumes no segments in place) */ for (b_size = OS_DATA_SIZE, order = OSST_FIRST_ORDER; b_size >= PAGE_SIZE; order--, b_size /= 2) { - STbuffer->sg[0].page = alloc_pages(priority, order); + struct page *page = alloc_pages(priority, order); + STbuffer->sg[0].offset = 0; - if (STbuffer->sg[0].page != NULL) { + if (page != NULL) { + sg_set_page(&STbuffer->sg[0], page); STbuffer->sg[0].length = b_size; - STbuffer->b_data = page_address(STbuffer->sg[0].page); + STbuffer->b_data = page_address(page); break; } } - if (STbuffer->sg[0].page == NULL) { + if (sg_page(&STbuffer->sg[0]) == NULL) { printk(KERN_NOTICE "osst :I: Can't allocate tape buffer main segment.\n"); return 0; } /* Got initial segment of 'bsize,order', continue with same size if possible, except for AUX */ for (segs=STbuffer->sg_segs=1, got=b_size; segs < max_segs && got < OS_FRAME_SIZE; ) { - STbuffer->sg[segs].page = - alloc_pages(priority, (OS_FRAME_SIZE - got <= PAGE_SIZE) ? 0 : order); + struct page *page = alloc_pages(priority, (OS_FRAME_SIZE - got <= PAGE_SIZE) ? 0 : order); STbuffer->sg[segs].offset = 0; - if (STbuffer->sg[segs].page == NULL) { + if (page == NULL) { if (OS_FRAME_SIZE - got <= (max_segs - segs) * b_size / 2 && order) { b_size /= 2; /* Large enough for the rest of the buffers */ order--; @@ -5284,6 +5285,7 @@ static int enlarge_buffer(struct osst_buffer *STbuffer, int need_dma) normalize_buffer(STbuffer); return 0; } + sg_set_page(&STbuffer->sg[segs], page); STbuffer->sg[segs].length = (OS_FRAME_SIZE - got <= PAGE_SIZE / 2) ? (OS_FRAME_SIZE - got) : b_size; got += STbuffer->sg[segs].length; STbuffer->buffer_size = got; @@ -5316,7 +5318,7 @@ static void normalize_buffer(struct osst_buffer *STbuffer) b_size < STbuffer->sg[i].length; b_size *= 2, order++); - __free_pages(STbuffer->sg[i].page, order); + __free_pages(sg_page(&STbuffer->sg[i]), order); STbuffer->buffer_size -= STbuffer->sg[i].length; } #if DEBUG @@ -5344,7 +5346,7 @@ static int append_to_buffer(const char __user *ubp, struct osst_buffer *st_bp, i for ( ; i < st_bp->sg_segs && do_count > 0; i++) { cnt = st_bp->sg[i].length - offset < do_count ? st_bp->sg[i].length - offset : do_count; - res = copy_from_user(page_address(st_bp->sg[i].page) + offset, ubp, cnt); + res = copy_from_user(page_address(sg_page(&st_bp->sg[i])) + offset, ubp, cnt); if (res) return (-EFAULT); do_count -= cnt; @@ -5377,7 +5379,7 @@ static int from_buffer(struct osst_buffer *st_bp, char __user *ubp, int do_count for ( ; i < st_bp->sg_segs && do_count > 0; i++) { cnt = st_bp->sg[i].length - offset < do_count ? st_bp->sg[i].length - offset : do_count; - res = copy_to_user(ubp, page_address(st_bp->sg[i].page) + offset, cnt); + res = copy_to_user(ubp, page_address(sg_page(&st_bp->sg[i])) + offset, cnt); if (res) return (-EFAULT); do_count -= cnt; @@ -5410,7 +5412,7 @@ static int osst_zero_buffer_tail(struct osst_buffer *st_bp) i < st_bp->sg_segs && do_count > 0; i++) { cnt = st_bp->sg[i].length - offset < do_count ? st_bp->sg[i].length - offset : do_count ; - memset(page_address(st_bp->sg[i].page) + offset, 0, cnt); + memset(page_address(sg_page(&st_bp->sg[i])) + offset, 0, cnt); do_count -= cnt; offset = 0; } @@ -5430,7 +5432,7 @@ static int osst_copy_to_buffer(struct osst_buffer *st_bp, unsigned char *ptr) for (i = 0; i < st_bp->sg_segs && do_count > 0; i++) { cnt = st_bp->sg[i].length < do_count ? st_bp->sg[i].length : do_count ; - memcpy(page_address(st_bp->sg[i].page), ptr, cnt); + memcpy(page_address(sg_page(&st_bp->sg[i])), ptr, cnt); do_count -= cnt; ptr += cnt; } @@ -5451,7 +5453,7 @@ static int osst_copy_from_buffer(struct osst_buffer *st_bp, unsigned char *ptr) for (i = 0; i < st_bp->sg_segs && do_count > 0; i++) { cnt = st_bp->sg[i].length < do_count ? st_bp->sg[i].length : do_count ; - memcpy(ptr, page_address(st_bp->sg[i].page), cnt); + memcpy(ptr, page_address(sg_page(&st_bp->sg[i])), cnt); do_count -= cnt; ptr += cnt; } diff --git a/drivers/scsi/pcmcia/nsp_cs.h b/drivers/scsi/pcmcia/nsp_cs.h index 98397559c53b..7db28cd49446 100644 --- a/drivers/scsi/pcmcia/nsp_cs.h +++ b/drivers/scsi/pcmcia/nsp_cs.h @@ -393,7 +393,7 @@ enum _burst_mode { #define MSG_EXT_SDTR 0x01 /* scatter-gather table */ -# define BUFFER_ADDR ((char *)((unsigned int)(SCpnt->SCp.buffer->page) + SCpnt->SCp.buffer->offset)) +# define BUFFER_ADDR ((char *)((sg_virt(SCpnt->SCp.buffer)))) #endif /*__nsp_cs__*/ /* end */ diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c index 190e2a7d7067..969b9387a0c3 100644 --- a/drivers/scsi/pcmcia/sym53c500_cs.c +++ b/drivers/scsi/pcmcia/sym53c500_cs.c @@ -443,8 +443,7 @@ SYM53C500_intr(int irq, void *dev_id) scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) { SYM53C500_pio_write(fast_pio, port_base, - page_address(sg->page) + sg->offset, - sg->length); + sg_virt(sg), sg->length); } REG0(port_base); } @@ -463,8 +462,7 @@ SYM53C500_intr(int irq, void *dev_id) scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) { SYM53C500_pio_read(fast_pio, port_base, - page_address(sg->page) + sg->offset, - sg->length); + sg_virt(sg), sg->length); } REG0(port_base); } diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c index 67b6d76a6c8d..67ee51a3d7e1 100644 --- a/drivers/scsi/ppa.c +++ b/drivers/scsi/ppa.c @@ -608,9 +608,7 @@ static int ppa_completion(struct scsi_cmnd *cmd) cmd->SCp.buffer++; cmd->SCp.this_residual = cmd->SCp.buffer->length; - cmd->SCp.ptr = - page_address(cmd->SCp.buffer->page) + - cmd->SCp.buffer->offset; + cmd->SCp.ptr = sg_virt(cmd->SCp.buffer); } } /* Now check to see if the drive is ready to comunicate */ @@ -756,8 +754,7 @@ static int ppa_engine(ppa_struct *dev, struct scsi_cmnd *cmd) /* if many buffers are available, start filling the first */ cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer; cmd->SCp.this_residual = cmd->SCp.buffer->length; - cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) + - cmd->SCp.buffer->offset; + cmd->SCp.ptr = sg_virt(cmd->SCp.buffer); } else { /* else fill the only available buffer */ cmd->SCp.buffer = NULL; diff --git a/drivers/scsi/ps3rom.c b/drivers/scsi/ps3rom.c index 0f43d1d046d9..03f19b8d19c9 100644 --- a/drivers/scsi/ps3rom.c +++ b/drivers/scsi/ps3rom.c @@ -111,14 +111,14 @@ static int fill_from_dev_buffer(struct scsi_cmnd *cmd, const void *buf) req_len = act_len = 0; scsi_for_each_sg(cmd, sgpnt, scsi_sg_count(cmd), k) { if (active) { - kaddr = kmap_atomic(sgpnt->page, KM_IRQ0); + kaddr = kmap_atomic(sg_page(sgpnt), KM_IRQ0); len = sgpnt->length; if ((req_len + len) > buflen) { active = 0; len = buflen - req_len; } memcpy(kaddr + sgpnt->offset, buf + req_len, len); - flush_kernel_dcache_page(sgpnt->page); + flush_kernel_dcache_page(sg_page(sgpnt)); kunmap_atomic(kaddr, KM_IRQ0); act_len += len; } @@ -147,7 +147,7 @@ static int fetch_to_dev_buffer(struct scsi_cmnd *cmd, void *buf) req_len = fin = 0; scsi_for_each_sg(cmd, sgpnt, scsi_sg_count(cmd), k) { - kaddr = kmap_atomic(sgpnt->page, KM_IRQ0); + kaddr = kmap_atomic(sg_page(sgpnt->page), KM_IRQ0); len = sgpnt->length; if ((req_len + len) > buflen) { len = buflen - req_len; diff --git a/drivers/scsi/qlogicfas408.c b/drivers/scsi/qlogicfas408.c index 2bfbf26c00ed..de7b3bc2cbc9 100644 --- a/drivers/scsi/qlogicfas408.c +++ b/drivers/scsi/qlogicfas408.c @@ -317,7 +317,7 @@ static unsigned int ql_pcmd(struct scsi_cmnd *cmd) return ((priv->qabort == 1 ? DID_ABORT : DID_RESET) << 16); } - buf = page_address(sg->page) + sg->offset; + buf = sg_virt(sg); if (ql_pdma(priv, phase, buf, sg->length)) break; } diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 72ee4c9cfb1a..46cae5a212de 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -625,7 +625,7 @@ static int fill_from_dev_buffer(struct scsi_cmnd * scp, unsigned char * arr, scsi_for_each_sg(scp, sg, scp->use_sg, k) { if (active) { kaddr = (unsigned char *) - kmap_atomic(sg->page, KM_USER0); + kmap_atomic(sg_page(sg), KM_USER0); if (NULL == kaddr) return (DID_ERROR << 16); kaddr_off = (unsigned char *)kaddr + sg->offset; @@ -672,7 +672,7 @@ static int fetch_to_dev_buffer(struct scsi_cmnd * scp, unsigned char * arr, sg = scsi_sglist(scp); req_len = fin = 0; for (k = 0; k < scp->use_sg; ++k, sg = sg_next(sg)) { - kaddr = (unsigned char *)kmap_atomic(sg->page, KM_USER0); + kaddr = (unsigned char *)kmap_atomic(sg_page(sg), KM_USER0); if (NULL == kaddr) return -1; kaddr_off = (unsigned char *)kaddr + sg->offset; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index aac8a02cbe80..61fdaf02f251 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -295,7 +295,7 @@ static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl, int i, err, nr_vecs = 0; for_each_sg(sgl, sg, nsegs, i) { - page = sg->page; + page = sg_page(sg); off = sg->offset; len = sg->length; data_len += len; @@ -764,7 +764,7 @@ struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask) if (unlikely(!sgl)) goto enomem; - memset(sgl, 0, sizeof(*sgl) * sgp->size); + sg_init_table(sgl, sgp->size); /* * first loop through, set initial index and return value @@ -781,6 +781,13 @@ struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask) sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl); /* + * if we have nothing left, mark the last segment as + * end-of-list + */ + if (!left) + sg_mark_end(sgl, this); + + /* * don't allow subsequent mempool allocs to sleep, it would * violate the mempool principle. */ @@ -2353,7 +2360,7 @@ void *scsi_kmap_atomic_sg(struct scatterlist *sgl, int sg_count, *offset = *offset - len_complete + sg->offset; /* Assumption: contiguous pages can be accessed as "page + i" */ - page = nth_page(sg->page, (*offset >> PAGE_SHIFT)); + page = nth_page(sg_page(sg), (*offset >> PAGE_SHIFT)); *offset &= ~PAGE_MASK; /* Bytes in this sg-entry from *offset to the end of the page */ diff --git a/drivers/scsi/seagate.c b/drivers/scsi/seagate.c index ce80fa9ad815..b11324479b5b 100644 --- a/drivers/scsi/seagate.c +++ b/drivers/scsi/seagate.c @@ -999,14 +999,14 @@ connect_loop: for (i = 0; i < nobuffs; ++i) printk("scsi%d : buffer %d address = %p length = %d\n", hostno, i, - page_address(buffer[i].page) + buffer[i].offset, + sg_virt(&buffer[i]), buffer[i].length); } #endif buffer = (struct scatterlist *) SCint->request_buffer; len = buffer->length; - data = page_address(buffer->page) + buffer->offset; + data = sg_virt(buffer); } else { DPRINTK (DEBUG_SG, "scsi%d : scatter gather not requested.\n", hostno); buffer = NULL; @@ -1239,7 +1239,7 @@ connect_loop: --nobuffs; ++buffer; len = buffer->length; - data = page_address(buffer->page) + buffer->offset; + data = sg_virt(buffer); DPRINTK (DEBUG_SG, "scsi%d : next scatter-gather buffer len = %d address = %08x\n", hostno, len, data); @@ -1396,7 +1396,7 @@ connect_loop: --nobuffs; ++buffer; len = buffer->length; - data = page_address(buffer->page) + buffer->offset; + data = sg_virt(buffer); DPRINTK (DEBUG_SG, "scsi%d : next scatter-gather buffer len = %d address = %08x\n", hostno, len, data); } break; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 7238b2dfc497..cc1971002846 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1169,7 +1169,7 @@ sg_vma_nopage(struct vm_area_struct *vma, unsigned long addr, int *type) len = vma->vm_end - sa; len = (len < sg->length) ? len : sg->length; if (offset < len) { - page = virt_to_page(page_address(sg->page) + offset); + page = virt_to_page(page_address(sg_page(sg)) + offset); get_page(page); /* increment page count */ break; } @@ -1717,13 +1717,13 @@ st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages, goto out_unlock; */ } - sgl[0].page = pages[0]; + sg_set_page(sgl, pages[0]); sgl[0].offset = uaddr & ~PAGE_MASK; if (nr_pages > 1) { sgl[0].length = PAGE_SIZE - sgl[0].offset; count -= sgl[0].length; for (i=1; i < nr_pages ; i++) { - sgl[i].page = pages[i]; + sg_set_page(&sgl[i], pages[i]); sgl[i].length = count < PAGE_SIZE ? count : PAGE_SIZE; count -= PAGE_SIZE; } @@ -1754,7 +1754,7 @@ st_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_pages, int i; for (i=0; i < nr_pages; i++) { - struct page *page = sgl[i].page; + struct page *page = sg_page(&sgl[i]); if (dirtied) SetPageDirty(page); @@ -1854,7 +1854,7 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) scatter_elem_sz_prev = ret_sz; } } - sg->page = p; + sg_set_page(sg, p); sg->length = (ret_sz > num) ? num : ret_sz; SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k=%d, num=%d, " @@ -1907,14 +1907,14 @@ sg_write_xfer(Sg_request * srp) onum = 1; ksglen = sg->length; - p = page_address(sg->page); + p = page_address(sg_page(sg)); for (j = 0, k = 0; j < onum; ++j) { res = sg_u_iovec(hp, iovec_count, j, 1, &usglen, &up); if (res) return res; for (; p; sg = sg_next(sg), ksglen = sg->length, - p = page_address(sg->page)) { + p = page_address(sg_page(sg))) { if (usglen <= 0) break; if (ksglen > usglen) { @@ -1991,12 +1991,12 @@ sg_remove_scat(Sg_scatter_hold * schp) } else { int k; - for (k = 0; (k < schp->k_use_sg) && sg->page; + for (k = 0; (k < schp->k_use_sg) && sg_page(sg); ++k, sg = sg_next(sg)) { SCSI_LOG_TIMEOUT(5, printk( "sg_remove_scat: k=%d, pg=0x%p, len=%d\n", - k, sg->page, sg->length)); - sg_page_free(sg->page, sg->length); + k, sg_page(sg), sg->length)); + sg_page_free(sg_page(sg), sg->length); } } kfree(schp->buffer); @@ -2038,7 +2038,7 @@ sg_read_xfer(Sg_request * srp) } else onum = 1; - p = page_address(sg->page); + p = page_address(sg_page(sg)); ksglen = sg->length; for (j = 0, k = 0; j < onum; ++j) { res = sg_u_iovec(hp, iovec_count, j, 0, &usglen, &up); @@ -2046,7 +2046,7 @@ sg_read_xfer(Sg_request * srp) return res; for (; p; sg = sg_next(sg), ksglen = sg->length, - p = page_address(sg->page)) { + p = page_address(sg_page(sg))) { if (usglen <= 0) break; if (ksglen > usglen) { @@ -2092,15 +2092,15 @@ sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer) if ((!outp) || (num_read_xfer <= 0)) return 0; - for (k = 0; (k < schp->k_use_sg) && sg->page; ++k, sg = sg_next(sg)) { + for (k = 0; (k < schp->k_use_sg) && sg_page(sg); ++k, sg = sg_next(sg)) { num = sg->length; if (num > num_read_xfer) { - if (__copy_to_user(outp, page_address(sg->page), + if (__copy_to_user(outp, page_address(sg_page(sg)), num_read_xfer)) return -EFAULT; break; } else { - if (__copy_to_user(outp, page_address(sg->page), + if (__copy_to_user(outp, page_address(sg_page(sg)), num)) return -EFAULT; num_read_xfer -= num; diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 73c44cbdea47..ce69b9efc102 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -3797,7 +3797,7 @@ static void buf_to_sg(struct st_buffer *STbp, unsigned int length) sg = &(STbp->sg[0]); frp = STbp->frp; for (i=count=0; count < length; i++) { - sg[i].page = frp[i].page; + sg_set_page(&sg[i], frp[i].page); if (length - count > frp[i].length) sg[i].length = frp[i].length; else @@ -4446,14 +4446,14 @@ static int sgl_map_user_pages(struct scatterlist *sgl, const unsigned int max_pa } /* Populate the scatter/gather list */ - sgl[0].page = pages[0]; + sg_set_page(&sgl[0], pages[0]); sgl[0].offset = uaddr & ~PAGE_MASK; if (nr_pages > 1) { sgl[0].length = PAGE_SIZE - sgl[0].offset; count -= sgl[0].length; for (i=1; i < nr_pages ; i++) { + sg_set_page(&sgl[i], pages[i]);; sgl[i].offset = 0; - sgl[i].page = pages[i]; sgl[i].length = count < PAGE_SIZE ? count : PAGE_SIZE; count -= PAGE_SIZE; } @@ -4483,7 +4483,7 @@ static int sgl_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_p int i; for (i=0; i < nr_pages; i++) { - struct page *page = sgl[i].page; + struct page *page = sg_page(&sgl[i]); if (dirtied) SetPageDirty(page); diff --git a/drivers/scsi/sun3_NCR5380.c b/drivers/scsi/sun3_NCR5380.c index 4aafe89b557f..2dcde373b20e 100644 --- a/drivers/scsi/sun3_NCR5380.c +++ b/drivers/scsi/sun3_NCR5380.c @@ -272,8 +272,7 @@ static struct scsi_host_template *the_template = NULL; #define HOSTNO instance->host_no #define H_NO(cmd) (cmd)->device->host->host_no -#define SGADDR(buffer) (void *)(((unsigned long)page_address((buffer)->page)) + \ - (buffer)->offset) +#define SGADDR(buffer) (void *)(((unsigned long)sg_virt(((buffer))))) #ifdef SUPPORT_TAGS diff --git a/drivers/scsi/sym53c416.c b/drivers/scsi/sym53c416.c index 8befab7e9839..90cee94d9522 100644 --- a/drivers/scsi/sym53c416.c +++ b/drivers/scsi/sym53c416.c @@ -196,7 +196,7 @@ static unsigned int sym53c416_base_3[2] = {0,0}; #define MAXHOSTS 4 -#define SG_ADDRESS(buffer) ((char *) (page_address((buffer)->page)+(buffer)->offset)) +#define SG_ADDRESS(buffer) ((char *) sg_virt((buffer))) enum phases { diff --git a/drivers/scsi/tmscsim.c b/drivers/scsi/tmscsim.c index 5c72ca31a47a..44193049c4ae 100644 --- a/drivers/scsi/tmscsim.c +++ b/drivers/scsi/tmscsim.c @@ -430,10 +430,7 @@ static __inline__ void dc390_Going_remove (struct dc390_dcb* pDCB, struct dc390_ static struct scatterlist* dc390_sg_build_single(struct scatterlist *sg, void *addr, unsigned int length) { - memset(sg, 0, sizeof(struct scatterlist)); - sg->page = virt_to_page(addr); - sg->length = length; - sg->offset = (unsigned long)addr & ~PAGE_MASK; + sg_init_one(sg, addr, length); return sg; } diff --git a/drivers/scsi/ultrastor.c b/drivers/scsi/ultrastor.c index ea72bbeb8f9d..6d1f0edd7985 100644 --- a/drivers/scsi/ultrastor.c +++ b/drivers/scsi/ultrastor.c @@ -681,7 +681,7 @@ static inline void build_sg_list(struct mscp *mscp, struct scsi_cmnd *SCpnt) max = scsi_sg_count(SCpnt); scsi_for_each_sg(SCpnt, sg, max, i) { - mscp->sglist[i].address = isa_page_to_bus(sg->page) + sg->offset; + mscp->sglist[i].address = isa_page_to_bus(sg_page(sg)) + sg->offset; mscp->sglist[i].num_bytes = sg->length; transfer_length += sg->length; } diff --git a/drivers/scsi/wd33c93.c b/drivers/scsi/wd33c93.c index 0e8e642fd3b0..fdbb92d1f722 100644 --- a/drivers/scsi/wd33c93.c +++ b/drivers/scsi/wd33c93.c @@ -410,8 +410,7 @@ wd33c93_queuecommand(struct scsi_cmnd *cmd, if (cmd->use_sg) { cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer; cmd->SCp.buffers_residual = cmd->use_sg - 1; - cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) + - cmd->SCp.buffer->offset; + cmd->SCp.ptr = sg_virt(cmd->SCp.buffer); cmd->SCp.this_residual = cmd->SCp.buffer->length; } else { cmd->SCp.buffer = NULL; @@ -745,8 +744,7 @@ transfer_bytes(const wd33c93_regs regs, struct scsi_cmnd *cmd, ++cmd->SCp.buffer; --cmd->SCp.buffers_residual; cmd->SCp.this_residual = cmd->SCp.buffer->length; - cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) + - cmd->SCp.buffer->offset; + cmd->SCp.ptr = sg_virt(cmd->SCp.buffer); } if (!cmd->SCp.this_residual) /* avoid bogus setups */ return; diff --git a/drivers/scsi/wd7000.c b/drivers/scsi/wd7000.c index 255c611e78b8..03cd44f231df 100644 --- a/drivers/scsi/wd7000.c +++ b/drivers/scsi/wd7000.c @@ -1123,7 +1123,7 @@ static int wd7000_queuecommand(struct scsi_cmnd *SCpnt, any2scsi(scb->maxlen, nseg * sizeof(Sgb)); scsi_for_each_sg(SCpnt, sg, nseg, i) { - any2scsi(sgb[i].ptr, isa_page_to_bus(sg->page) + sg->offset); + any2scsi(sgb[i].ptr, isa_page_to_bus(sg_page(sg)) + sg->offset); any2scsi(sgb[i].len, sg->length); } } else { diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 8dd5a6afd513..90d64a808464 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -437,13 +437,11 @@ int usb_sg_init ( #if defined(CONFIG_HIGHMEM) || defined(CONFIG_IOMMU) io->urbs[i]->transfer_buffer = NULL; #else - io->urbs[i]->transfer_buffer = - page_address(sg[i].page) + sg[i].offset; + io->urbs[i]->transfer_buffer = sg_virt(&sg[i]); #endif } else { /* hc may use _only_ transfer_buffer */ - io->urbs [i]->transfer_buffer = - page_address (sg [i].page) + sg [i].offset; + io->urbs [i]->transfer_buffer = sg_virt(&sg[i]); len = sg [i].length; } diff --git a/drivers/usb/image/microtek.c b/drivers/usb/image/microtek.c index e7d982a71548..91e999c9f680 100644 --- a/drivers/usb/image/microtek.c +++ b/drivers/usb/image/microtek.c @@ -519,8 +519,7 @@ static void mts_do_sg (struct urb* transfer) context->fragment++; mts_int_submit_urb(transfer, context->data_pipe, - page_address(sg[context->fragment].page) + - sg[context->fragment].offset, + sg_virt(&sg[context->fragment]), sg[context->fragment].length, context->fragment + 1 == scsi_sg_count(context->srb) ? mts_data_done : mts_do_sg); @@ -557,7 +556,7 @@ mts_build_transfer_context(struct scsi_cmnd *srb, struct mts_desc* desc) return; } else { sg = scsi_sglist(srb); - desc->context.data = page_address(sg[0].page) + sg[0].offset; + desc->context.data = sg_virt(&sg[0]); desc->context.data_length = sg[0].length; } diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index e901d31e051b..ea3162146481 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -360,9 +360,9 @@ static void free_sglist (struct scatterlist *sg, int nents) if (!sg) return; for (i = 0; i < nents; i++) { - if (!sg [i].page) + if (!sg_page(&sg[i])) continue; - kfree (page_address (sg [i].page) + sg [i].offset); + kfree (sg_virt(&sg[i])); } kfree (sg); } diff --git a/drivers/usb/storage/protocol.c b/drivers/usb/storage/protocol.c index cc8f7c52c729..889622baac20 100644 --- a/drivers/usb/storage/protocol.c +++ b/drivers/usb/storage/protocol.c @@ -195,7 +195,7 @@ unsigned int usb_stor_access_xfer_buf(unsigned char *buffer, * the *offset and *index values for the next loop. */ cnt = 0; while (cnt < buflen) { - struct page *page = sg->page + + struct page *page = sg_page(sg) + ((sg->offset + *offset) >> PAGE_SHIFT); unsigned int poff = (sg->offset + *offset) & (PAGE_SIZE-1); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 0a3ee5a322b0..5574ba3ab1f9 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -103,7 +103,7 @@ extern int cifs_ioctl(struct inode *inode, struct file *filep, unsigned int command, unsigned long arg); #ifdef CONFIG_CIFS_EXPERIMENTAL -extern struct export_operations cifs_export_ops; +extern const struct export_operations cifs_export_ops; #endif /* EXPERIMENTAL */ #define CIFS_VERSION "1.51" diff --git a/fs/cifs/export.c b/fs/cifs/export.c index d614b91caeca..75949d6a5f1b 100644 --- a/fs/cifs/export.c +++ b/fs/cifs/export.c @@ -53,7 +53,7 @@ static struct dentry *cifs_get_parent(struct dentry *dentry) return ERR_PTR(-EACCES); } -struct export_operations cifs_export_ops = { +const struct export_operations cifs_export_ops = { .get_parent = cifs_get_parent, /* Following five export operations are unneeded so far and can default: .get_dentry = diff --git a/fs/dcache.c b/fs/dcache.c index 2bb3f7ac683b..d9ca1e5ceb92 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1479,6 +1479,8 @@ static void switch_names(struct dentry *dentry, struct dentry *target) * dentry:internal, target:external. Steal target's * storage and make target internal. */ + memcpy(target->d_iname, dentry->d_name.name, + dentry->d_name.len + 1); dentry->d_name.name = target->d_name.name; target->d_name.name = target->d_iname; } diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 1ae90ef2c74d..0a9882edf562 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -283,7 +283,7 @@ int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, pg = virt_to_page(addr); offset = offset_in_page(addr); if (sg) { - sg[i].page = pg; + sg_set_page(&sg[i], pg); sg[i].offset = offset; } remainder_of_page = PAGE_CACHE_SIZE - offset; @@ -713,10 +713,13 @@ ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, { struct scatterlist src_sg, dst_sg; - src_sg.page = src_page; + sg_init_table(&src_sg, 1); + sg_init_table(&dst_sg, 1); + + sg_set_page(&src_sg, src_page); src_sg.offset = src_offset; src_sg.length = size; - dst_sg.page = dst_page; + sg_set_page(&dst_sg, dst_page); dst_sg.offset = dst_offset; dst_sg.length = size; return encrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv); @@ -742,10 +745,13 @@ ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, { struct scatterlist src_sg, dst_sg; - src_sg.page = src_page; + sg_init_table(&src_sg, 1); + sg_init_table(&dst_sg, 1); + + sg_set_page(&src_sg, src_page); src_sg.offset = src_offset; src_sg.length = size; - dst_sg.page = dst_page; + sg_set_page(&dst_sg, dst_page); dst_sg.offset = dst_offset; dst_sg.length = size; return decrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv); diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 89d9710dd63d..263fed88c0ca 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1040,6 +1040,9 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, }; int rc = 0; + sg_init_table(&dst_sg, 1); + sg_init_table(&src_sg, 1); + if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk( KERN_DEBUG, "Session key encryption key (size [%d]):\n", diff --git a/fs/efs/namei.c b/fs/efs/namei.c index 5276b19423c1..f7f407075be1 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c @@ -10,6 +10,8 @@ #include <linux/string.h> #include <linux/efs_fs.h> #include <linux/smp_lock.h> +#include <linux/exportfs.h> + static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len) { struct buffer_head *bh; @@ -75,13 +77,10 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei return NULL; } -struct dentry *efs_get_dentry(struct super_block *sb, void *vobjp) +static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino, + u32 generation) { - __u32 *objp = vobjp; - unsigned long ino = objp[0]; - __u32 generation = objp[1]; struct inode *inode; - struct dentry *result; if (ino == 0) return ERR_PTR(-ESTALE); @@ -91,20 +90,25 @@ struct dentry *efs_get_dentry(struct super_block *sb, void *vobjp) if (is_bad_inode(inode) || (generation && inode->i_generation != generation)) { - result = ERR_PTR(-ESTALE); - goto out_iput; + iput(inode); + return ERR_PTR(-ESTALE); } - result = d_alloc_anon(inode); - if (!result) { - result = ERR_PTR(-ENOMEM); - goto out_iput; - } - return result; + return inode; +} - out_iput: - iput(inode); - return result; +struct dentry *efs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + efs_nfs_get_inode); +} + +struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + efs_nfs_get_inode); } struct dentry *efs_get_parent(struct dentry *child) diff --git a/fs/efs/super.c b/fs/efs/super.c index 25d0326c5f1c..c79bc627f107 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -113,8 +113,9 @@ static const struct super_operations efs_superblock_operations = { .remount_fs = efs_remount, }; -static struct export_operations efs_export_ops = { - .get_dentry = efs_get_dentry, +static const struct export_operations efs_export_ops = { + .fh_to_dentry = efs_fh_to_dentry, + .fh_to_parent = efs_fh_to_parent, .get_parent = efs_get_parent, }; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 8adb32a9387a..109ab5e44eca 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -1,4 +1,13 @@ - +/* + * Copyright (C) Neil Brown 2002 + * Copyright (C) Christoph Hellwig 2007 + * + * This file contains the code mapping from inodes to NFS file handles, + * and for mapping back from file handles to dentries. + * + * For details on why we do all the strange and hairy things in here + * take a look at Documentation/filesystems/Exporting. + */ #include <linux/exportfs.h> #include <linux/fs.h> #include <linux/file.h> @@ -9,32 +18,19 @@ #define dprintk(fmt, args...) do{}while(0) -static int get_name(struct dentry *dentry, char *name, +static int get_name(struct vfsmount *mnt, struct dentry *dentry, char *name, struct dentry *child); -static struct dentry *exportfs_get_dentry(struct super_block *sb, void *obj) +static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir, + char *name, struct dentry *child) { - struct dentry *result = ERR_PTR(-ESTALE); - - if (sb->s_export_op->get_dentry) { - result = sb->s_export_op->get_dentry(sb, obj); - if (!result) - result = ERR_PTR(-ESTALE); - } - - return result; -} - -static int exportfs_get_name(struct dentry *dir, char *name, - struct dentry *child) -{ - struct export_operations *nop = dir->d_sb->s_export_op; + const struct export_operations *nop = dir->d_sb->s_export_op; if (nop->get_name) return nop->get_name(dir, name, child); else - return get_name(dir, name, child); + return get_name(mnt, dir, name, child); } /* @@ -98,7 +94,7 @@ find_disconnected_root(struct dentry *dentry) * It may already be, as the flag isn't always updated when connection happens. */ static int -reconnect_path(struct super_block *sb, struct dentry *target_dir) +reconnect_path(struct vfsmount *mnt, struct dentry *target_dir) { char nbuf[NAME_MAX+1]; int noprogress = 0; @@ -121,7 +117,7 @@ reconnect_path(struct super_block *sb, struct dentry *target_dir) pd->d_flags &= ~DCACHE_DISCONNECTED; spin_unlock(&pd->d_lock); noprogress = 0; - } else if (pd == sb->s_root) { + } else if (pd == mnt->mnt_sb->s_root) { printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n"); spin_lock(&pd->d_lock); pd->d_flags &= ~DCACHE_DISCONNECTED; @@ -147,8 +143,8 @@ reconnect_path(struct super_block *sb, struct dentry *target_dir) struct dentry *npd; mutex_lock(&pd->d_inode->i_mutex); - if (sb->s_export_op->get_parent) - ppd = sb->s_export_op->get_parent(pd); + if (mnt->mnt_sb->s_export_op->get_parent) + ppd = mnt->mnt_sb->s_export_op->get_parent(pd); mutex_unlock(&pd->d_inode->i_mutex); if (IS_ERR(ppd)) { @@ -161,7 +157,7 @@ reconnect_path(struct super_block *sb, struct dentry *target_dir) dprintk("%s: find name of %lu in %lu\n", __FUNCTION__, pd->d_inode->i_ino, ppd->d_inode->i_ino); - err = exportfs_get_name(ppd, nbuf, pd); + err = exportfs_get_name(mnt, ppd, nbuf, pd); if (err) { dput(ppd); dput(pd); @@ -214,125 +210,6 @@ reconnect_path(struct super_block *sb, struct dentry *target_dir) return 0; } -/** - * find_exported_dentry - helper routine to implement export_operations->decode_fh - * @sb: The &super_block identifying the filesystem - * @obj: An opaque identifier of the object to be found - passed to - * get_inode - * @parent: An optional opqaue identifier of the parent of the object. - * @acceptable: A function used to test possible &dentries to see if they are - * acceptable - * @context: A parameter to @acceptable so that it knows on what basis to - * judge. - * - * find_exported_dentry is the central helper routine to enable file systems - * to provide the decode_fh() export_operation. It's main task is to take - * an &inode, find or create an appropriate &dentry structure, and possibly - * splice this into the dcache in the correct place. - * - * The decode_fh() operation provided by the filesystem should call - * find_exported_dentry() with the same parameters that it received except - * that instead of the file handle fragment, pointers to opaque identifiers - * for the object and optionally its parent are passed. The default decode_fh - * routine passes one pointer to the start of the filehandle fragment, and - * one 8 bytes into the fragment. It is expected that most filesystems will - * take this approach, though the offset to the parent identifier may well be - * different. - * - * find_exported_dentry() will call get_dentry to get an dentry pointer from - * the file system. If any &dentry in the d_alias list is acceptable, it will - * be returned. Otherwise find_exported_dentry() will attempt to splice a new - * &dentry into the dcache using get_name() and get_parent() to find the - * appropriate place. - */ - -struct dentry * -find_exported_dentry(struct super_block *sb, void *obj, void *parent, - int (*acceptable)(void *context, struct dentry *de), - void *context) -{ - struct dentry *result, *alias; - int err = -ESTALE; - - /* - * Attempt to find the inode. - */ - result = exportfs_get_dentry(sb, obj); - if (IS_ERR(result)) - return result; - - if (S_ISDIR(result->d_inode->i_mode)) { - if (!(result->d_flags & DCACHE_DISCONNECTED)) { - if (acceptable(context, result)) - return result; - err = -EACCES; - goto err_result; - } - - err = reconnect_path(sb, result); - if (err) - goto err_result; - } else { - struct dentry *target_dir, *nresult; - char nbuf[NAME_MAX+1]; - - alias = find_acceptable_alias(result, acceptable, context); - if (alias) - return alias; - - if (parent == NULL) - goto err_result; - - target_dir = exportfs_get_dentry(sb,parent); - if (IS_ERR(target_dir)) { - err = PTR_ERR(target_dir); - goto err_result; - } - - err = reconnect_path(sb, target_dir); - if (err) { - dput(target_dir); - goto err_result; - } - - /* - * As we weren't after a directory, have one more step to go. - */ - err = exportfs_get_name(target_dir, nbuf, result); - if (!err) { - mutex_lock(&target_dir->d_inode->i_mutex); - nresult = lookup_one_len(nbuf, target_dir, - strlen(nbuf)); - mutex_unlock(&target_dir->d_inode->i_mutex); - if (!IS_ERR(nresult)) { - if (nresult->d_inode) { - dput(result); - result = nresult; - } else - dput(nresult); - } - } - dput(target_dir); - } - - alias = find_acceptable_alias(result, acceptable, context); - if (alias) - return alias; - - /* drat - I just cannot find anything acceptable */ - dput(result); - /* It might be justifiable to return ESTALE here, - * but the filehandle at-least looks reasonable good - * and it may just be a permission problem, so returning - * -EACCESS is safer - */ - return ERR_PTR(-EACCES); - - err_result: - dput(result); - return ERR_PTR(err); -} - struct getdents_callback { char *name; /* name that was found. It already points to a buffer NAME_MAX+1 is size */ @@ -370,8 +247,8 @@ static int filldir_one(void * __buf, const char * name, int len, * calls readdir on the parent until it finds an entry with * the same inode number as the child, and returns that. */ -static int get_name(struct dentry *dentry, char *name, - struct dentry *child) +static int get_name(struct vfsmount *mnt, struct dentry *dentry, + char *name, struct dentry *child) { struct inode *dir = dentry->d_inode; int error; @@ -387,7 +264,7 @@ static int get_name(struct dentry *dentry, char *name, /* * Open the directory ... */ - file = dentry_open(dget(dentry), NULL, O_RDONLY); + file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY); error = PTR_ERR(file); if (IS_ERR(file)) goto out; @@ -434,100 +311,177 @@ out: * can be used to check that it is still valid. It places them in the * filehandle fragment where export_decode_fh expects to find them. */ -static int export_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, - int connectable) +static int export_encode_fh(struct dentry *dentry, struct fid *fid, + int *max_len, int connectable) { struct inode * inode = dentry->d_inode; int len = *max_len; - int type = 1; + int type = FILEID_INO32_GEN; if (len < 2 || (connectable && len < 4)) return 255; len = 2; - fh[0] = inode->i_ino; - fh[1] = inode->i_generation; + fid->i32.ino = inode->i_ino; + fid->i32.gen = inode->i_generation; if (connectable && !S_ISDIR(inode->i_mode)) { struct inode *parent; spin_lock(&dentry->d_lock); parent = dentry->d_parent->d_inode; - fh[2] = parent->i_ino; - fh[3] = parent->i_generation; + fid->i32.parent_ino = parent->i_ino; + fid->i32.parent_gen = parent->i_generation; spin_unlock(&dentry->d_lock); len = 4; - type = 2; + type = FILEID_INO32_GEN_PARENT; } *max_len = len; return type; } - -/** - * export_decode_fh - default export_operations->decode_fh function - * @sb: The superblock - * @fh: pointer to the file handle fragment - * @fh_len: length of file handle fragment - * @acceptable: function for testing acceptability of dentrys - * @context: context for @acceptable - * - * This is the default decode_fh() function. - * a fileid_type of 1 indicates that the filehandlefragment - * just contains an object identifier understood by get_dentry. - * a fileid_type of 2 says that there is also a directory - * identifier 8 bytes in to the filehandlefragement. - */ -static struct dentry *export_decode_fh(struct super_block *sb, __u32 *fh, int fh_len, - int fileid_type, - int (*acceptable)(void *context, struct dentry *de), - void *context) -{ - __u32 parent[2]; - parent[0] = parent[1] = 0; - if (fh_len < 2 || fileid_type > 2) - return NULL; - if (fileid_type == 2) { - if (fh_len > 2) parent[0] = fh[2]; - if (fh_len > 3) parent[1] = fh[3]; - } - return find_exported_dentry(sb, fh, parent, - acceptable, context); -} - -int exportfs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, +int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, int connectable) { - struct export_operations *nop = dentry->d_sb->s_export_op; + const struct export_operations *nop = dentry->d_sb->s_export_op; int error; if (nop->encode_fh) - error = nop->encode_fh(dentry, fh, max_len, connectable); + error = nop->encode_fh(dentry, fid->raw, max_len, connectable); else - error = export_encode_fh(dentry, fh, max_len, connectable); + error = export_encode_fh(dentry, fid, max_len, connectable); return error; } EXPORT_SYMBOL_GPL(exportfs_encode_fh); -struct dentry *exportfs_decode_fh(struct vfsmount *mnt, __u32 *fh, int fh_len, - int fileid_type, int (*acceptable)(void *, struct dentry *), - void *context) +struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, + int fh_len, int fileid_type, + int (*acceptable)(void *, struct dentry *), void *context) { - struct export_operations *nop = mnt->mnt_sb->s_export_op; - struct dentry *result; + const struct export_operations *nop = mnt->mnt_sb->s_export_op; + struct dentry *result, *alias; + int err; - if (nop->decode_fh) { - result = nop->decode_fh(mnt->mnt_sb, fh, fh_len, fileid_type, - acceptable, context); + /* + * Try to get any dentry for the given file handle from the filesystem. + */ + result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); + if (!result) + result = ERR_PTR(-ESTALE); + if (IS_ERR(result)) + return result; + + if (S_ISDIR(result->d_inode->i_mode)) { + /* + * This request is for a directory. + * + * On the positive side there is only one dentry for each + * directory inode. On the negative side this implies that we + * to ensure our dentry is connected all the way up to the + * filesystem root. + */ + if (result->d_flags & DCACHE_DISCONNECTED) { + err = reconnect_path(mnt, result); + if (err) + goto err_result; + } + + if (!acceptable(context, result)) { + err = -EACCES; + goto err_result; + } + + return result; } else { - result = export_decode_fh(mnt->mnt_sb, fh, fh_len, fileid_type, - acceptable, context); + /* + * It's not a directory. Life is a little more complicated. + */ + struct dentry *target_dir, *nresult; + char nbuf[NAME_MAX+1]; + + /* + * See if either the dentry we just got from the filesystem + * or any alias for it is acceptable. This is always true + * if this filesystem is exported without the subtreecheck + * option. If the filesystem is exported with the subtree + * check option there's a fair chance we need to look at + * the parent directory in the file handle and make sure + * it's connected to the filesystem root. + */ + alias = find_acceptable_alias(result, acceptable, context); + if (alias) + return alias; + + /* + * Try to extract a dentry for the parent directory from the + * file handle. If this fails we'll have to give up. + */ + err = -ESTALE; + if (!nop->fh_to_parent) + goto err_result; + + target_dir = nop->fh_to_parent(mnt->mnt_sb, fid, + fh_len, fileid_type); + if (!target_dir) + goto err_result; + err = PTR_ERR(target_dir); + if (IS_ERR(target_dir)) + goto err_result; + + /* + * And as usual we need to make sure the parent directory is + * connected to the filesystem root. The VFS really doesn't + * like disconnected directories.. + */ + err = reconnect_path(mnt, target_dir); + if (err) { + dput(target_dir); + goto err_result; + } + + /* + * Now that we've got both a well-connected parent and a + * dentry for the inode we're after, make sure that our + * inode is actually connected to the parent. + */ + err = exportfs_get_name(mnt, target_dir, nbuf, result); + if (!err) { + mutex_lock(&target_dir->d_inode->i_mutex); + nresult = lookup_one_len(nbuf, target_dir, + strlen(nbuf)); + mutex_unlock(&target_dir->d_inode->i_mutex); + if (!IS_ERR(nresult)) { + if (nresult->d_inode) { + dput(result); + result = nresult; + } else + dput(nresult); + } + } + + /* + * At this point we are done with the parent, but it's pinned + * by the child dentry anyway. + */ + dput(target_dir); + + /* + * And finally make sure the dentry is actually acceptable + * to NFSD. + */ + alias = find_acceptable_alias(result, acceptable, context); + if (!alias) { + err = -EACCES; + goto err_result; + } + + return alias; } - return result; + err_result: + dput(result); + return ERR_PTR(err); } EXPORT_SYMBOL_GPL(exportfs_decode_fh); -EXPORT_SYMBOL(find_exported_dentry); - MODULE_LICENSE("GPL"); diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 05d9342bb64e..d868e26c15eb 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -28,6 +28,24 @@ typedef struct ext2_dir_entry_2 ext2_dirent; +static inline unsigned ext2_rec_len_from_disk(__le16 dlen) +{ + unsigned len = le16_to_cpu(dlen); + + if (len == EXT2_MAX_REC_LEN) + return 1 << 16; + return len; +} + +static inline __le16 ext2_rec_len_to_disk(unsigned len) +{ + if (len == (1 << 16)) + return cpu_to_le16(EXT2_MAX_REC_LEN); + else if (len > (1 << 16)) + BUG(); + return cpu_to_le16(len); +} + /* * ext2 uses block-sized chunks. Arguably, sector-sized ones would be * more robust, but we have what we have @@ -106,7 +124,7 @@ static void ext2_check_page(struct page *page) } for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) { p = (ext2_dirent *)(kaddr + offs); - rec_len = le16_to_cpu(p->rec_len); + rec_len = ext2_rec_len_from_disk(p->rec_len); if (rec_len < EXT2_DIR_REC_LEN(1)) goto Eshort; @@ -204,7 +222,8 @@ static inline int ext2_match (int len, const char * const name, */ static inline ext2_dirent *ext2_next_entry(ext2_dirent *p) { - return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len)); + return (ext2_dirent *)((char *)p + + ext2_rec_len_from_disk(p->rec_len)); } static inline unsigned @@ -316,7 +335,7 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) return 0; } } - filp->f_pos += le16_to_cpu(de->rec_len); + filp->f_pos += ext2_rec_len_from_disk(de->rec_len); } ext2_put_page(page); } @@ -425,7 +444,7 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, { loff_t pos = page_offset(page) + (char *) de - (char *) page_address(page); - unsigned len = le16_to_cpu(de->rec_len); + unsigned len = ext2_rec_len_from_disk(de->rec_len); int err; lock_page(page); @@ -482,7 +501,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) /* We hit i_size */ name_len = 0; rec_len = chunk_size; - de->rec_len = cpu_to_le16(chunk_size); + de->rec_len = ext2_rec_len_to_disk(chunk_size); de->inode = 0; goto got_it; } @@ -496,7 +515,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) if (ext2_match (namelen, name, de)) goto out_unlock; name_len = EXT2_DIR_REC_LEN(de->name_len); - rec_len = le16_to_cpu(de->rec_len); + rec_len = ext2_rec_len_from_disk(de->rec_len); if (!de->inode && rec_len >= reclen) goto got_it; if (rec_len >= name_len + reclen) @@ -518,8 +537,8 @@ got_it: goto out_unlock; if (de->inode) { ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len); - de1->rec_len = cpu_to_le16(rec_len - name_len); - de->rec_len = cpu_to_le16(name_len); + de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len); + de->rec_len = ext2_rec_len_to_disk(name_len); de = de1; } de->name_len = namelen; @@ -550,7 +569,8 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) struct inode *inode = mapping->host; char *kaddr = page_address(page); unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); - unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len); + unsigned to = ((char *)dir - kaddr) + + ext2_rec_len_from_disk(dir->rec_len); loff_t pos; ext2_dirent * pde = NULL; ext2_dirent * de = (ext2_dirent *) (kaddr + from); @@ -574,7 +594,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) &page, NULL); BUG_ON(err); if (pde) - pde->rec_len = cpu_to_le16(to - from); + pde->rec_len = ext2_rec_len_to_disk(to - from); dir->inode = 0; err = ext2_commit_chunk(page, pos, to - from); inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; @@ -610,14 +630,14 @@ int ext2_make_empty(struct inode *inode, struct inode *parent) memset(kaddr, 0, chunk_size); de = (struct ext2_dir_entry_2 *)kaddr; de->name_len = 1; - de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1)); + de->rec_len = ext2_rec_len_to_disk(EXT2_DIR_REC_LEN(1)); memcpy (de->name, ".\0\0", 4); de->inode = cpu_to_le32(inode->i_ino); ext2_set_de_type (de, inode); de = (struct ext2_dir_entry_2 *)(kaddr + EXT2_DIR_REC_LEN(1)); de->name_len = 2; - de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1)); + de->rec_len = ext2_rec_len_to_disk(chunk_size - EXT2_DIR_REC_LEN(1)); de->inode = cpu_to_le32(parent->i_ino); memcpy (de->name, "..\0", 4); ext2_set_de_type (de, inode); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 77bd5f9262f9..154e25f13d77 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -311,13 +311,10 @@ static const struct super_operations ext2_sops = { #endif }; -static struct dentry *ext2_get_dentry(struct super_block *sb, void *vobjp) +static struct inode *ext2_nfs_get_inode(struct super_block *sb, + u64 ino, u32 generation) { - __u32 *objp = vobjp; - unsigned long ino = objp[0]; - __u32 generation = objp[1]; struct inode *inode; - struct dentry *result; if (ino < EXT2_FIRST_INO(sb) && ino != EXT2_ROOT_INO) return ERR_PTR(-ESTALE); @@ -338,15 +335,21 @@ static struct dentry *ext2_get_dentry(struct super_block *sb, void *vobjp) iput(inode); return ERR_PTR(-ESTALE); } - /* now to find a dentry. - * If possible, get a well-connected one - */ - result = d_alloc_anon(inode); - if (!result) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - return result; + return inode; +} + +static struct dentry *ext2_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + ext2_nfs_get_inode); +} + +static struct dentry *ext2_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + ext2_nfs_get_inode); } /* Yes, most of these are left as NULL!! @@ -354,9 +357,10 @@ static struct dentry *ext2_get_dentry(struct super_block *sb, void *vobjp) * systems, but can be improved upon. * Currently only get_parent is required. */ -static struct export_operations ext2_export_ops = { +static const struct export_operations ext2_export_ops = { + .fh_to_dentry = ext2_fh_to_dentry, + .fh_to_parent = ext2_fh_to_parent, .get_parent = ext2_get_parent, - .get_dentry = ext2_get_dentry, }; static unsigned long get_sb_block(void **data) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 81868c0bc40e..de55da9e28ba 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -631,13 +631,10 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) } -static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp) +static struct inode *ext3_nfs_get_inode(struct super_block *sb, + u64 ino, u32 generation) { - __u32 *objp = vobjp; - unsigned long ino = objp[0]; - __u32 generation = objp[1]; struct inode *inode; - struct dentry *result; if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO) return ERR_PTR(-ESTALE); @@ -660,15 +657,22 @@ static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp) iput(inode); return ERR_PTR(-ESTALE); } - /* now to find a dentry. - * If possible, get a well-connected one - */ - result = d_alloc_anon(inode); - if (!result) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - return result; + + return inode; +} + +static struct dentry *ext3_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + ext3_nfs_get_inode); +} + +static struct dentry *ext3_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + ext3_nfs_get_inode); } #ifdef CONFIG_QUOTA @@ -737,9 +741,10 @@ static const struct super_operations ext3_sops = { #endif }; -static struct export_operations ext3_export_ops = { +static const struct export_operations ext3_export_ops = { + .fh_to_dentry = ext3_fh_to_dentry, + .fh_to_parent = ext3_fh_to_parent, .get_parent = ext3_get_parent, - .get_dentry = ext3_get_dentry, }; enum { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b11e9e2bcd01..8031dc0e24e5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -686,13 +686,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) } -static struct dentry *ext4_get_dentry(struct super_block *sb, void *vobjp) +static struct inode *ext4_nfs_get_inode(struct super_block *sb, + u64 ino, u32 generation) { - __u32 *objp = vobjp; - unsigned long ino = objp[0]; - __u32 generation = objp[1]; struct inode *inode; - struct dentry *result; if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) return ERR_PTR(-ESTALE); @@ -715,15 +712,22 @@ static struct dentry *ext4_get_dentry(struct super_block *sb, void *vobjp) iput(inode); return ERR_PTR(-ESTALE); } - /* now to find a dentry. - * If possible, get a well-connected one - */ - result = d_alloc_anon(inode); - if (!result) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - return result; + + return inode; +} + +static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + ext4_nfs_get_inode); +} + +static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + ext4_nfs_get_inode); } #ifdef CONFIG_QUOTA @@ -792,9 +796,10 @@ static const struct super_operations ext4_sops = { #endif }; -static struct export_operations ext4_export_ops = { +static const struct export_operations ext4_export_ops = { + .fh_to_dentry = ext4_fh_to_dentry, + .fh_to_parent = ext4_fh_to_parent, .get_parent = ext4_get_parent, - .get_dentry = ext4_get_dentry, }; enum { diff --git a/fs/fat/inode.c b/fs/fat/inode.c index c0c5e9c55b58..920a576e1c25 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -653,24 +653,15 @@ static const struct super_operations fat_sops = { * of i_logstart is used to store the directory entry offset. */ -static struct dentry * -fat_decode_fh(struct super_block *sb, __u32 *fh, int len, int fhtype, - int (*acceptable)(void *context, struct dentry *de), - void *context) -{ - if (fhtype != 3) - return ERR_PTR(-ESTALE); - if (len < 5) - return ERR_PTR(-ESTALE); - - return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable, context); -} - -static struct dentry *fat_get_dentry(struct super_block *sb, void *inump) +static struct dentry *fat_fh_to_dentry(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) { struct inode *inode = NULL; struct dentry *result; - __u32 *fh = inump; + u32 *fh = fid->raw; + + if (fh_len < 5 || fh_type != 3) + return NULL; inode = iget(sb, fh[0]); if (!inode || is_bad_inode(inode) || inode->i_generation != fh[1]) { @@ -783,10 +774,9 @@ out: return parent; } -static struct export_operations fat_export_ops = { - .decode_fh = fat_decode_fh, +static const struct export_operations fat_export_ops = { .encode_fh = fat_encode_fh, - .get_dentry = fat_get_dentry, + .fh_to_dentry = fat_fh_to_dentry, .get_parent = fat_get_parent, }; diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index e2d1347796a9..b9da62348a87 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c @@ -31,40 +31,6 @@ #define GFS2_LARGE_FH_SIZE 8 #define GFS2_OLD_FH_SIZE 10 -static struct dentry *gfs2_decode_fh(struct super_block *sb, - __u32 *p, - int fh_len, - int fh_type, - int (*acceptable)(void *context, - struct dentry *dentry), - void *context) -{ - __be32 *fh = (__force __be32 *)p; - struct gfs2_inum_host inum, parent; - - memset(&parent, 0, sizeof(struct gfs2_inum)); - - switch (fh_len) { - case GFS2_LARGE_FH_SIZE: - case GFS2_OLD_FH_SIZE: - parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32; - parent.no_formal_ino |= be32_to_cpu(fh[5]); - parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32; - parent.no_addr |= be32_to_cpu(fh[7]); - case GFS2_SMALL_FH_SIZE: - inum.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; - inum.no_formal_ino |= be32_to_cpu(fh[1]); - inum.no_addr = ((u64)be32_to_cpu(fh[2])) << 32; - inum.no_addr |= be32_to_cpu(fh[3]); - break; - default: - return NULL; - } - - return gfs2_export_ops.find_exported_dentry(sb, &inum, &parent, - acceptable, context); -} - static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, int connectable) { @@ -189,10 +155,10 @@ static struct dentry *gfs2_get_parent(struct dentry *child) return dentry; } -static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) +static struct dentry *gfs2_get_dentry(struct super_block *sb, + struct gfs2_inum_host *inum) { struct gfs2_sbd *sdp = sb->s_fs_info; - struct gfs2_inum_host *inum = inum_obj; struct gfs2_holder i_gh, ri_gh, rgd_gh; struct gfs2_rgrpd *rgd; struct inode *inode; @@ -289,11 +255,50 @@ fail: return ERR_PTR(error); } -struct export_operations gfs2_export_ops = { - .decode_fh = gfs2_decode_fh, +static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + struct gfs2_inum_host this; + __be32 *fh = (__force __be32 *)fid->raw; + + switch (fh_type) { + case GFS2_SMALL_FH_SIZE: + case GFS2_LARGE_FH_SIZE: + case GFS2_OLD_FH_SIZE: + this.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; + this.no_formal_ino |= be32_to_cpu(fh[1]); + this.no_addr = ((u64)be32_to_cpu(fh[2])) << 32; + this.no_addr |= be32_to_cpu(fh[3]); + return gfs2_get_dentry(sb, &this); + default: + return NULL; + } +} + +static struct dentry *gfs2_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + struct gfs2_inum_host parent; + __be32 *fh = (__force __be32 *)fid->raw; + + switch (fh_type) { + case GFS2_LARGE_FH_SIZE: + case GFS2_OLD_FH_SIZE: + parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32; + parent.no_formal_ino |= be32_to_cpu(fh[5]); + parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32; + parent.no_addr |= be32_to_cpu(fh[7]); + return gfs2_get_dentry(sb, &parent); + default: + return NULL; + } +} + +const struct export_operations gfs2_export_ops = { .encode_fh = gfs2_encode_fh, + .fh_to_dentry = gfs2_fh_to_dentry, + .fh_to_parent = gfs2_fh_to_parent, .get_name = gfs2_get_name, .get_parent = gfs2_get_parent, - .get_dentry = gfs2_get_dentry, }; diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h index 407029b3b2b3..da8490511836 100644 --- a/fs/gfs2/ops_fstype.h +++ b/fs/gfs2/ops_fstype.h @@ -14,6 +14,6 @@ extern struct file_system_type gfs2_fs_type; extern struct file_system_type gfs2meta_fs_type; -extern struct export_operations gfs2_export_ops; +extern const struct export_operations gfs2_export_ops; #endif /* __OPS_FSTYPE_DOT_H__ */ diff --git a/fs/isofs/export.c b/fs/isofs/export.c index 4af856a7fda7..29f9753ae5e5 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -42,16 +42,6 @@ isofs_export_iget(struct super_block *sb, return result; } -static struct dentry * -isofs_export_get_dentry(struct super_block *sb, void *vobjp) -{ - __u32 *objp = vobjp; - unsigned long block = objp[0]; - unsigned long offset = objp[1]; - __u32 generation = objp[2]; - return isofs_export_iget(sb, block, offset, generation); -} - /* This function is surprisingly simple. The trick is understanding * that "child" is always a directory. So, to find its parent, you * simply need to find its ".." entry, normalize its block and offset, @@ -182,43 +172,44 @@ isofs_export_encode_fh(struct dentry *dentry, return type; } +struct isofs_fid { + u32 block; + u16 offset; + u16 parent_offset; + u32 generation; + u32 parent_block; + u32 parent_generation; +}; -static struct dentry * -isofs_export_decode_fh(struct super_block *sb, - __u32 *fh32, - int fh_len, - int fileid_type, - int (*acceptable)(void *context, struct dentry *de), - void *context) +static struct dentry *isofs_fh_to_dentry(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) { - __u16 *fh16 = (__u16*)fh32; - __u32 child[3]; /* The child is what triggered all this. */ - __u32 parent[3]; /* The parent is just along for the ride. */ + struct isofs_fid *ifid = (struct isofs_fid *)fid; - if (fh_len < 3 || fileid_type > 2) + if (fh_len < 3 || fh_type > 2) return NULL; - child[0] = fh32[0]; - child[1] = fh16[2]; /* fh16 [sic] */ - child[2] = fh32[2]; - - parent[0] = 0; - parent[1] = 0; - parent[2] = 0; - if (fileid_type == 2) { - if (fh_len > 2) parent[0] = fh32[3]; - parent[1] = fh16[3]; /* fh16 [sic] */ - if (fh_len > 4) parent[2] = fh32[4]; - } - - return sb->s_export_op->find_exported_dentry(sb, child, parent, - acceptable, context); + return isofs_export_iget(sb, ifid->block, ifid->offset, + ifid->generation); } +static struct dentry *isofs_fh_to_parent(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + struct isofs_fid *ifid = (struct isofs_fid *)fid; + + if (fh_type != 2) + return NULL; + + return isofs_export_iget(sb, + fh_len > 2 ? ifid->parent_block : 0, + ifid->parent_offset, + fh_len > 4 ? ifid->parent_generation : 0); +} -struct export_operations isofs_export_ops = { - .decode_fh = isofs_export_decode_fh, +const struct export_operations isofs_export_ops = { .encode_fh = isofs_export_encode_fh, - .get_dentry = isofs_export_get_dentry, + .fh_to_dentry = isofs_fh_to_dentry, + .fh_to_parent = isofs_fh_to_parent, .get_parent = isofs_export_get_parent, }; diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index a07e67b1ea7f..f3213f9f89af 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -178,4 +178,4 @@ isofs_normalize_block_and_offset(struct iso_directory_record* de, extern const struct inode_operations isofs_dir_inode_operations; extern const struct file_operations isofs_dir_operations; extern const struct address_space_operations isofs_symlink_aops; -extern struct export_operations isofs_export_ops; +extern const struct export_operations isofs_export_ops; diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index f0ec72b263f1..8e2cf2cde185 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -18,6 +18,8 @@ #ifndef _H_JFS_INODE #define _H_JFS_INODE +struct fid; + extern struct inode *ialloc(struct inode *, umode_t); extern int jfs_fsync(struct file *, struct dentry *, int); extern int jfs_ioctl(struct inode *, struct file *, @@ -32,7 +34,10 @@ extern void jfs_truncate_nolock(struct inode *, loff_t); extern void jfs_free_zero_link(struct inode *); extern struct dentry *jfs_get_parent(struct dentry *dentry); extern void jfs_get_inode_flags(struct jfs_inode_info *); -extern struct dentry *jfs_get_dentry(struct super_block *sb, void *vobjp); +extern struct dentry *jfs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type); +extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type); extern void jfs_set_inode_flags(struct inode *); extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 932797ba433b..4e0a8493cef6 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -20,6 +20,7 @@ #include <linux/fs.h> #include <linux/ctype.h> #include <linux/quotaops.h> +#include <linux/exportfs.h> #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_inode.h" @@ -1477,13 +1478,10 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc return dentry; } -struct dentry *jfs_get_dentry(struct super_block *sb, void *vobjp) +static struct inode *jfs_nfs_get_inode(struct super_block *sb, + u64 ino, u32 generation) { - __u32 *objp = vobjp; - unsigned long ino = objp[0]; - __u32 generation = objp[1]; struct inode *inode; - struct dentry *result; if (ino == 0) return ERR_PTR(-ESTALE); @@ -1493,20 +1491,25 @@ struct dentry *jfs_get_dentry(struct super_block *sb, void *vobjp) if (is_bad_inode(inode) || (generation && inode->i_generation != generation)) { - result = ERR_PTR(-ESTALE); - goto out_iput; + iput(inode); + return ERR_PTR(-ESTALE); } - result = d_alloc_anon(inode); - if (!result) { - result = ERR_PTR(-ENOMEM); - goto out_iput; - } - return result; + return inode; +} - out_iput: - iput(inode); - return result; +struct dentry *jfs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + jfs_nfs_get_inode); +} + +struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + jfs_nfs_get_inode); } struct dentry *jfs_get_parent(struct dentry *dentry) diff --git a/fs/jfs/super.c b/fs/jfs/super.c index cff60c171943..314bb4ff1ba8 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -48,7 +48,7 @@ MODULE_LICENSE("GPL"); static struct kmem_cache * jfs_inode_cachep; static const struct super_operations jfs_super_operations; -static struct export_operations jfs_export_operations; +static const struct export_operations jfs_export_operations; static struct file_system_type jfs_fs_type; #define MAX_COMMIT_THREADS 64 @@ -737,8 +737,9 @@ static const struct super_operations jfs_super_operations = { #endif }; -static struct export_operations jfs_export_operations = { - .get_dentry = jfs_get_dentry, +static const struct export_operations jfs_export_operations = { + .fh_to_dentry = jfs_fh_to_dentry, + .fh_to_parent = jfs_fh_to_parent, .get_parent = jfs_get_parent, }; diff --git a/fs/libfs.c b/fs/libfs.c index ae51481e45e5..6e68b700958d 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -8,6 +8,7 @@ #include <linux/mount.h> #include <linux/vfs.h> #include <linux/mutex.h> +#include <linux/exportfs.h> #include <asm/uaccess.h> @@ -678,6 +679,93 @@ out: return ret; } +/* + * This is what d_alloc_anon should have been. Once the exportfs + * argument transition has been finished I will update d_alloc_anon + * to this prototype and this wrapper will go away. --hch + */ +static struct dentry *exportfs_d_alloc(struct inode *inode) +{ + struct dentry *dentry; + + if (!inode) + return NULL; + if (IS_ERR(inode)) + return ERR_PTR(PTR_ERR(inode)); + + dentry = d_alloc_anon(inode); + if (!dentry) { + iput(inode); + dentry = ERR_PTR(-ENOMEM); + } + return dentry; +} + +/** + * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation + * @sb: filesystem to do the file handle conversion on + * @fid: file handle to convert + * @fh_len: length of the file handle in bytes + * @fh_type: type of file handle + * @get_inode: filesystem callback to retrieve inode + * + * This function decodes @fid as long as it has one of the well-known + * Linux filehandle types and calls @get_inode on it to retrieve the + * inode for the object specified in the file handle. + */ +struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type, struct inode *(*get_inode) + (struct super_block *sb, u64 ino, u32 gen)) +{ + struct inode *inode = NULL; + + if (fh_len < 2) + return NULL; + + switch (fh_type) { + case FILEID_INO32_GEN: + case FILEID_INO32_GEN_PARENT: + inode = get_inode(sb, fid->i32.ino, fid->i32.gen); + break; + } + + return exportfs_d_alloc(inode); +} +EXPORT_SYMBOL_GPL(generic_fh_to_dentry); + +/** + * generic_fh_to_dentry - generic helper for the fh_to_parent export operation + * @sb: filesystem to do the file handle conversion on + * @fid: file handle to convert + * @fh_len: length of the file handle in bytes + * @fh_type: type of file handle + * @get_inode: filesystem callback to retrieve inode + * + * This function decodes @fid as long as it has one of the well-known + * Linux filehandle types and calls @get_inode on it to retrieve the + * inode for the _parent_ object specified in the file handle if it + * is specified in the file handle, or NULL otherwise. + */ +struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type, struct inode *(*get_inode) + (struct super_block *sb, u64 ino, u32 gen)) +{ + struct inode *inode = NULL; + + if (fh_len <= 2) + return NULL; + + switch (fh_type) { + case FILEID_INO32_GEN_PARENT: + inode = get_inode(sb, fid->i32.parent_ino, + (fh_len > 3 ? fid->i32.parent_gen : 0)); + break; + } + + return exportfs_d_alloc(inode); +} +EXPORT_SYMBOL_GPL(generic_fh_to_parent); + EXPORT_SYMBOL(dcache_dir_close); EXPORT_SYMBOL(dcache_dir_lseek); EXPORT_SYMBOL(dcache_dir_open); diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 04b266729802..66d0aeb32a47 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -386,15 +386,13 @@ static int check_export(struct inode *inode, int flags, unsigned char *uuid) dprintk("exp_export: export of non-dev fs without fsid\n"); return -EINVAL; } - if (!inode->i_sb->s_export_op) { + + if (!inode->i_sb->s_export_op || + !inode->i_sb->s_export_op->fh_to_dentry) { dprintk("exp_export: export of invalid fs type.\n"); return -EINVAL; } - /* Ok, we can export it */; - if (!inode->i_sb->s_export_op->find_exported_dentry) - inode->i_sb->s_export_op->find_exported_dentry = - find_exported_dentry; return 0; } diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index ebd03cc07479..6f03918018a3 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -88,7 +88,7 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname) { struct xdr_netobj cksum; struct hash_desc desc; - struct scatterlist sg[1]; + struct scatterlist sg; __be32 status = nfserr_resource; dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", @@ -102,11 +102,9 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname) if (cksum.data == NULL) goto out; - sg[0].page = virt_to_page(clname->data); - sg[0].offset = offset_in_page(clname->data); - sg[0].length = clname->len; + sg_init_one(&sg, clname->data, clname->len); - if (crypto_hash_digest(&desc, sg, sg->length, cksum.data)) + if (crypto_hash_digest(&desc, &sg, sg.length, cksum.data)) goto out; md5_to_hex(dname, cksum.data); diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 7011d62acfc8..4f712e970584 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -115,8 +115,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); if (!fhp->fh_dentry) { - __u32 *datap=NULL; - __u32 tfh[3]; /* filehandle fragment for oldstyle filehandles */ + struct fid *fid = NULL, sfid; int fileid_type; int data_left = fh->fh_size/4; @@ -128,7 +127,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) if (fh->fh_version == 1) { int len; - datap = fh->fh_auth; if (--data_left<0) goto out; switch (fh->fh_auth_type) { case 0: break; @@ -144,9 +142,11 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) fh->fh_fsid[1] = fh->fh_fsid[2]; } if ((data_left -= len)<0) goto out; - exp = rqst_exp_find(rqstp, fh->fh_fsid_type, datap); - datap += len; + exp = rqst_exp_find(rqstp, fh->fh_fsid_type, + fh->fh_auth); + fid = (struct fid *)(fh->fh_auth + len); } else { + __u32 tfh[2]; dev_t xdev; ino_t xino; if (fh->fh_size != NFS_FHSIZE) @@ -190,22 +190,22 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) error = nfserr_badhandle; if (fh->fh_version != 1) { - tfh[0] = fh->ofh_ino; - tfh[1] = fh->ofh_generation; - tfh[2] = fh->ofh_dirino; - datap = tfh; + sfid.i32.ino = fh->ofh_ino; + sfid.i32.gen = fh->ofh_generation; + sfid.i32.parent_ino = fh->ofh_dirino; + fid = &sfid; data_left = 3; if (fh->ofh_dirino == 0) - fileid_type = 1; + fileid_type = FILEID_INO32_GEN; else - fileid_type = 2; + fileid_type = FILEID_INO32_GEN_PARENT; } else fileid_type = fh->fh_fileid_type; - if (fileid_type == 0) + if (fileid_type == FILEID_ROOT) dentry = dget(exp->ex_dentry); else { - dentry = exportfs_decode_fh(exp->ex_mnt, datap, + dentry = exportfs_decode_fh(exp->ex_mnt, fid, data_left, fileid_type, nfsd_acceptable, exp); } @@ -286,16 +286,21 @@ out: * an inode. In this case a call to fh_update should be made * before the fh goes out on the wire ... */ -static inline int _fh_update(struct dentry *dentry, struct svc_export *exp, - __u32 *datap, int *maxsize) +static void _fh_update(struct svc_fh *fhp, struct svc_export *exp, + struct dentry *dentry) { - if (dentry == exp->ex_dentry) { - *maxsize = 0; - return 0; - } + if (dentry != exp->ex_dentry) { + struct fid *fid = (struct fid *) + (fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1); + int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; + int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK); - return exportfs_encode_fh(dentry, datap, maxsize, - !(exp->ex_flags & NFSEXP_NOSUBTREECHECK)); + fhp->fh_handle.fh_fileid_type = + exportfs_encode_fh(dentry, fid, &maxsize, subtreecheck); + fhp->fh_handle.fh_size += maxsize * 4; + } else { + fhp->fh_handle.fh_fileid_type = FILEID_ROOT; + } } /* @@ -457,12 +462,8 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, datap += len/4; fhp->fh_handle.fh_size = 4 + len; - if (inode) { - int size = (fhp->fh_maxsize-len-4)/4; - fhp->fh_handle.fh_fileid_type = - _fh_update(dentry, exp, datap, &size); - fhp->fh_handle.fh_size += size*4; - } + if (inode) + _fh_update(fhp, exp, dentry); if (fhp->fh_handle.fh_fileid_type == 255) return nfserr_opnotsupp; } @@ -479,7 +480,6 @@ __be32 fh_update(struct svc_fh *fhp) { struct dentry *dentry; - __u32 *datap; if (!fhp->fh_dentry) goto out_bad; @@ -490,15 +490,10 @@ fh_update(struct svc_fh *fhp) if (fhp->fh_handle.fh_version != 1) { _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle); } else { - int size; - if (fhp->fh_handle.fh_fileid_type != 0) + if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) goto out; - datap = fhp->fh_handle.fh_auth+ - fhp->fh_handle.fh_size/4 -1; - size = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; - fhp->fh_handle.fh_fileid_type = - _fh_update(dentry, fhp->fh_export, datap, &size); - fhp->fh_handle.fh_size += size*4; + + _fh_update(fhp, fhp->fh_export, dentry); if (fhp->fh_handle.fh_fileid_type == 255) return nfserr_opnotsupp; } diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index e93c6142b23c..e1781c8b1650 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -450,58 +450,40 @@ try_next: return parent_dent; } -/** - * ntfs_get_dentry - find a dentry for the inode from a file handle sub-fragment - * @sb: super block identifying the mounted ntfs volume - * @fh: the file handle sub-fragment - * - * Find a dentry for the inode given a file handle sub-fragment. This function - * is called from fs/exportfs/expfs.c::find_exported_dentry() which in turn is - * called from the default ->decode_fh() which is export_decode_fh() in the - * same file. The code is closely based on the default ->get_dentry() helper - * fs/exportfs/expfs.c::get_object(). - * - * The @fh contains two 32-bit unsigned values, the first one is the inode - * number and the second one is the inode generation. - * - * Return the dentry on success or the error code on error (IS_ERR() is true). - */ -static struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh) +static struct inode *ntfs_nfs_get_inode(struct super_block *sb, + u64 ino, u32 generation) { - struct inode *vi; - struct dentry *dent; - unsigned long ino = ((u32 *)fh)[0]; - u32 gen = ((u32 *)fh)[1]; + struct inode *inode; - ntfs_debug("Entering for inode 0x%lx, generation 0x%x.", ino, gen); - vi = ntfs_iget(sb, ino); - if (IS_ERR(vi)) { - ntfs_error(sb, "Failed to get inode 0x%lx.", ino); - return (struct dentry *)vi; - } - if (unlikely(is_bad_inode(vi) || vi->i_generation != gen)) { - /* We didn't find the right inode. */ - ntfs_error(sb, "Inode 0x%lx, bad count: %d %d or version 0x%x " - "0x%x.", vi->i_ino, vi->i_nlink, - atomic_read(&vi->i_count), vi->i_generation, - gen); - iput(vi); - return ERR_PTR(-ESTALE); - } - /* Now find a dentry. If possible, get a well-connected one. */ - dent = d_alloc_anon(vi); - if (unlikely(!dent)) { - iput(vi); - return ERR_PTR(-ENOMEM); + inode = ntfs_iget(sb, ino); + if (!IS_ERR(inode)) { + if (is_bad_inode(inode) || inode->i_generation != generation) { + iput(inode); + inode = ERR_PTR(-ESTALE); + } } - ntfs_debug("Done for inode 0x%lx, generation 0x%x.", ino, gen); - return dent; + + return inode; +} + +static struct dentry *ntfs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + ntfs_nfs_get_inode); +} + +static struct dentry *ntfs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + ntfs_nfs_get_inode); } /** * Export operations allowing NFS exporting of mounted NTFS partitions. * - * We use the default ->decode_fh() and ->encode_fh() for now. Note that they + * We use the default ->encode_fh() for now. Note that they * use 32 bits to store the inode number which is an unsigned long so on 64-bit * architectures is usually 64 bits so it would all fail horribly on huge * volumes. I guess we need to define our own encode and decode fh functions @@ -517,10 +499,9 @@ static struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh) * allowing the inode number 0 which is used in NTFS for the system file $MFT * and due to using iget() whereas NTFS needs ntfs_iget(). */ -struct export_operations ntfs_export_ops = { +const struct export_operations ntfs_export_ops = { .get_parent = ntfs_get_parent, /* Find the parent of a given directory. */ - .get_dentry = ntfs_get_dentry, /* Find a dentry for the inode - given a file handle - sub-fragment. */ + .fh_to_dentry = ntfs_fh_to_dentry, + .fh_to_parent = ntfs_fh_to_parent, }; diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h index d73f5a9ac341..d6a340bf80fc 100644 --- a/fs/ntfs/ntfs.h +++ b/fs/ntfs/ntfs.h @@ -69,7 +69,7 @@ extern const struct inode_operations ntfs_dir_inode_ops; extern const struct file_operations ntfs_empty_file_ops; extern const struct inode_operations ntfs_empty_inode_ops; -extern struct export_operations ntfs_export_ops; +extern const struct export_operations ntfs_export_ops; /** * NTFS_SB - return the ntfs volume given a vfs super block diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index c3bbc198f9ce..535bfa9568a4 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -45,9 +45,9 @@ struct ocfs2_inode_handle u32 ih_generation; }; -static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp) +static struct dentry *ocfs2_get_dentry(struct super_block *sb, + struct ocfs2_inode_handle *handle) { - struct ocfs2_inode_handle *handle = vobjp; struct inode *inode; struct dentry *result; @@ -194,54 +194,37 @@ bail: return type; } -static struct dentry *ocfs2_decode_fh(struct super_block *sb, u32 *fh_in, - int fh_len, int fileid_type, - int (*acceptable)(void *context, - struct dentry *de), - void *context) +static struct dentry *ocfs2_fh_to_dentry(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) { - struct ocfs2_inode_handle handle, parent; - struct dentry *ret = NULL; - __le32 *fh = (__force __le32 *) fh_in; - - mlog_entry("(0x%p, 0x%p, %d, %d, 0x%p, 0x%p)\n", - sb, fh, fh_len, fileid_type, acceptable, context); - - if (fh_len < 3 || fileid_type > 2) - goto bail; - - if (fileid_type == 2) { - if (fh_len < 6) - goto bail; - - parent.ih_blkno = (u64)le32_to_cpu(fh[3]) << 32; - parent.ih_blkno |= (u64)le32_to_cpu(fh[4]); - parent.ih_generation = le32_to_cpu(fh[5]); + struct ocfs2_inode_handle handle; - mlog(0, "Decoding parent: blkno: %llu, generation: %u\n", - (unsigned long long)parent.ih_blkno, - parent.ih_generation); - } + if (fh_len < 3 || fh_type > 2) + return NULL; - handle.ih_blkno = (u64)le32_to_cpu(fh[0]) << 32; - handle.ih_blkno |= (u64)le32_to_cpu(fh[1]); - handle.ih_generation = le32_to_cpu(fh[2]); + handle.ih_blkno = (u64)le32_to_cpu(fid->raw[0]) << 32; + handle.ih_blkno |= (u64)le32_to_cpu(fid->raw[1]); + handle.ih_generation = le32_to_cpu(fid->raw[2]); + return ocfs2_get_dentry(sb, &handle); +} - mlog(0, "Encoding fh: blkno: %llu, generation: %u\n", - (unsigned long long)handle.ih_blkno, handle.ih_generation); +static struct dentry *ocfs2_fh_to_parent(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + struct ocfs2_inode_handle parent; - ret = ocfs2_export_ops.find_exported_dentry(sb, &handle, &parent, - acceptable, context); + if (fh_type != 2 || fh_len < 6) + return NULL; -bail: - mlog_exit_ptr(ret); - return ret; + parent.ih_blkno = (u64)le32_to_cpu(fid->raw[3]) << 32; + parent.ih_blkno |= (u64)le32_to_cpu(fid->raw[4]); + parent.ih_generation = le32_to_cpu(fid->raw[5]); + return ocfs2_get_dentry(sb, &parent); } -struct export_operations ocfs2_export_ops = { - .decode_fh = ocfs2_decode_fh, +const struct export_operations ocfs2_export_ops = { .encode_fh = ocfs2_encode_fh, - + .fh_to_dentry = ocfs2_fh_to_dentry, + .fh_to_parent = ocfs2_fh_to_parent, .get_parent = ocfs2_get_parent, - .get_dentry = ocfs2_get_dentry, }; diff --git a/fs/ocfs2/export.h b/fs/ocfs2/export.h index e08bed9e45a0..41a738678c37 100644 --- a/fs/ocfs2/export.h +++ b/fs/ocfs2/export.h @@ -28,6 +28,6 @@ #include <linux/exportfs.h> -extern struct export_operations ocfs2_export_ops; +extern const struct export_operations ocfs2_export_ops; #endif /* OCFS2_EXPORT_H */ diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a991af96f3f0..231fd5ccadc5 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1515,19 +1515,20 @@ struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key) return inode; } -struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp) +static struct dentry *reiserfs_get_dentry(struct super_block *sb, + u32 objectid, u32 dir_id, u32 generation) + { - __u32 *data = vobjp; struct cpu_key key; struct dentry *result; struct inode *inode; - key.on_disk_key.k_objectid = data[0]; - key.on_disk_key.k_dir_id = data[1]; + key.on_disk_key.k_objectid = objectid; + key.on_disk_key.k_dir_id = dir_id; reiserfs_write_lock(sb); inode = reiserfs_iget(sb, &key); - if (inode && !IS_ERR(inode) && data[2] != 0 && - data[2] != inode->i_generation) { + if (inode && !IS_ERR(inode) && generation != 0 && + generation != inode->i_generation) { iput(inode); inode = NULL; } @@ -1544,14 +1545,9 @@ struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp) return result; } -struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data, - int len, int fhtype, - int (*acceptable) (void *contect, - struct dentry * de), - void *context) +struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) { - __u32 obj[3], parent[3]; - /* fhtype happens to reflect the number of u32s encoded. * due to a bug in earlier code, fhtype might indicate there * are more u32s then actually fitted. @@ -1564,32 +1560,28 @@ struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data, * 6 - as above plus generation of directory * 6 does not fit in NFSv2 handles */ - if (fhtype > len) { - if (fhtype != 6 || len != 5) + if (fh_type > fh_len) { + if (fh_type != 6 || fh_len != 5) reiserfs_warning(sb, - "nfsd/reiserfs, fhtype=%d, len=%d - odd", - fhtype, len); - fhtype = 5; + "nfsd/reiserfs, fhtype=%d, len=%d - odd", + fh_type, fh_len); + fh_type = 5; } - obj[0] = data[0]; - obj[1] = data[1]; - if (fhtype == 3 || fhtype >= 5) - obj[2] = data[2]; - else - obj[2] = 0; /* generation number */ + return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1], + (fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0); +} - if (fhtype >= 4) { - parent[0] = data[fhtype >= 5 ? 3 : 2]; - parent[1] = data[fhtype >= 5 ? 4 : 3]; - if (fhtype == 6) - parent[2] = data[5]; - else - parent[2] = 0; - } - return sb->s_export_op->find_exported_dentry(sb, obj, - fhtype < 4 ? NULL : parent, - acceptable, context); +struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + if (fh_type < 4) + return NULL; + + return reiserfs_get_dentry(sb, + (fh_type >= 5) ? fid->raw[3] : fid->raw[2], + (fh_type >= 5) ? fid->raw[4] : fid->raw[3], + (fh_type == 6) ? fid->raw[5] : 0); } int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 98c3781bc069..5cd85fe5df5d 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -661,11 +661,11 @@ static struct quotactl_ops reiserfs_qctl_operations = { }; #endif -static struct export_operations reiserfs_export_ops = { +static const struct export_operations reiserfs_export_ops = { .encode_fh = reiserfs_encode_fh, - .decode_fh = reiserfs_decode_fh, + .fh_to_dentry = reiserfs_fh_to_dentry, + .fh_to_parent = reiserfs_fh_to_parent, .get_parent = reiserfs_get_parent, - .get_dentry = reiserfs_get_dentry, }; /* this struct is used in reiserfs_getopt () for containing the value for those diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 3586c7a28d2c..15bd4948832c 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -33,62 +33,25 @@ static struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, }; /* - * XFS encodes and decodes the fileid portion of NFS filehandles - * itself instead of letting the generic NFS code do it. This - * allows filesystems with 64 bit inode numbers to be exported. - * - * Note that a side effect is that xfs_vget() won't be passed a - * zero inode/generation pair under normal circumstances. As - * however a malicious client could send us such data, the check - * remains in that code. + * Note that we only accept fileids which are long enough rather than allow + * the parent generation number to default to zero. XFS considers zero a + * valid generation number not an invalid/wildcard value. */ - -STATIC struct dentry * -xfs_fs_decode_fh( - struct super_block *sb, - __u32 *fh, - int fh_len, - int fileid_type, - int (*acceptable)( - void *context, - struct dentry *de), - void *context) +static int xfs_fileid_length(int fileid_type) { - xfs_fid_t ifid; - xfs_fid_t pfid; - void *parent = NULL; - int is64 = 0; - __u32 *p = fh; - -#if XFS_BIG_INUMS - is64 = (fileid_type & XFS_FILEID_TYPE_64FLAG); - fileid_type &= ~XFS_FILEID_TYPE_64FLAG; -#endif - - /* - * Note that we only accept fileids which are long enough - * rather than allow the parent generation number to default - * to zero. XFS considers zero a valid generation number not - * an invalid/wildcard value. There's little point printk'ing - * a warning here as we don't have the client information - * which would make such a warning useful. - */ - if (fileid_type > 2 || - fh_len < xfs_fileid_length((fileid_type == 2), is64)) - return NULL; - - p = xfs_fileid_decode_fid2(p, &ifid, is64); - - if (fileid_type == 2) { - p = xfs_fileid_decode_fid2(p, &pfid, is64); - parent = &pfid; + switch (fileid_type) { + case FILEID_INO32_GEN: + return 2; + case FILEID_INO32_GEN_PARENT: + return 4; + case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: + return 3; + case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: + return 6; } - - fh = (__u32 *)&ifid; - return sb->s_export_op->find_exported_dentry(sb, fh, parent, acceptable, context); + return 255; /* invalid */ } - STATIC int xfs_fs_encode_fh( struct dentry *dentry, @@ -96,21 +59,21 @@ xfs_fs_encode_fh( int *max_len, int connectable) { + struct fid *fid = (struct fid *)fh; + struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fh; struct inode *inode = dentry->d_inode; - int type = 1; - __u32 *p = fh; + int fileid_type; int len; - int is64 = 0; -#if XFS_BIG_INUMS - if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS)) { - /* filesystem may contain 64bit inode numbers */ - is64 = XFS_FILEID_TYPE_64FLAG; - } -#endif /* Directories don't need their parent encoded, they have ".." */ if (S_ISDIR(inode->i_mode)) - connectable = 0; + fileid_type = FILEID_INO32_GEN; + else + fileid_type = FILEID_INO32_GEN_PARENT; + + /* filesystem may contain 64bit inode numbers */ + if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS)) + fileid_type |= XFS_FILEID_TYPE_64FLAG; /* * Only encode if there is enough space given. In practice @@ -118,39 +81,118 @@ xfs_fs_encode_fh( * over NFSv2 with the subtree_check export option; the other * seven combinations work. The real answer is "don't use v2". */ - len = xfs_fileid_length(connectable, is64); + len = xfs_fileid_length(fileid_type); if (*max_len < len) return 255; *max_len = len; - p = xfs_fileid_encode_inode(p, inode, is64); - if (connectable) { + switch (fileid_type) { + case FILEID_INO32_GEN_PARENT: spin_lock(&dentry->d_lock); - p = xfs_fileid_encode_inode(p, dentry->d_parent->d_inode, is64); + fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino; + fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation; spin_unlock(&dentry->d_lock); - type = 2; + /*FALLTHRU*/ + case FILEID_INO32_GEN: + fid->i32.ino = inode->i_ino; + fid->i32.gen = inode->i_generation; + break; + case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: + spin_lock(&dentry->d_lock); + fid64->parent_ino = dentry->d_parent->d_inode->i_ino; + fid64->parent_gen = dentry->d_parent->d_inode->i_generation; + spin_unlock(&dentry->d_lock); + /*FALLTHRU*/ + case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: + fid64->ino = inode->i_ino; + fid64->gen = inode->i_generation; + break; } - BUG_ON((p - fh) != len); - return type | is64; + + return fileid_type; } -STATIC struct dentry * -xfs_fs_get_dentry( +STATIC struct inode * +xfs_nfs_get_inode( struct super_block *sb, - void *data) -{ + u64 ino, + u32 generation) + { + xfs_fid_t xfid; bhv_vnode_t *vp; - struct inode *inode; - struct dentry *result; int error; - error = xfs_vget(XFS_M(sb), &vp, data); - if (error || vp == NULL) - return ERR_PTR(-ESTALE) ; + xfid.fid_len = sizeof(xfs_fid_t) - sizeof(xfid.fid_len); + xfid.fid_pad = 0; + xfid.fid_ino = ino; + xfid.fid_gen = generation; - inode = vn_to_inode(vp); + error = xfs_vget(XFS_M(sb), &vp, &xfid); + if (error) + return ERR_PTR(-error); + + return vp ? vn_to_inode(vp) : NULL; +} + +STATIC struct dentry * +xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fileid_type) +{ + struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; + struct inode *inode = NULL; + struct dentry *result; + + if (fh_len < xfs_fileid_length(fileid_type)) + return NULL; + + switch (fileid_type) { + case FILEID_INO32_GEN_PARENT: + case FILEID_INO32_GEN: + inode = xfs_nfs_get_inode(sb, fid->i32.ino, fid->i32.gen); + break; + case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: + case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: + inode = xfs_nfs_get_inode(sb, fid64->ino, fid64->gen); + break; + } + + if (!inode) + return NULL; + if (IS_ERR(inode)) + return ERR_PTR(PTR_ERR(inode)); + result = d_alloc_anon(inode); + if (!result) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + return result; +} + +STATIC struct dentry * +xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fileid_type) +{ + struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; + struct inode *inode = NULL; + struct dentry *result; + + switch (fileid_type) { + case FILEID_INO32_GEN_PARENT: + inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino, + fid->i32.parent_gen); + break; + case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: + inode = xfs_nfs_get_inode(sb, fid64->parent_ino, + fid64->parent_gen); + break; + } + + if (!inode) + return NULL; + if (IS_ERR(inode)) + return ERR_PTR(PTR_ERR(inode)); result = d_alloc_anon(inode); - if (!result) { + if (!result) { iput(inode); return ERR_PTR(-ENOMEM); } @@ -178,9 +220,9 @@ xfs_fs_get_parent( return parent; } -struct export_operations xfs_export_operations = { - .decode_fh = xfs_fs_decode_fh, +const struct export_operations xfs_export_operations = { .encode_fh = xfs_fs_encode_fh, + .fh_to_dentry = xfs_fs_fh_to_dentry, + .fh_to_parent = xfs_fs_fh_to_parent, .get_parent = xfs_fs_get_parent, - .get_dentry = xfs_fs_get_dentry, }; diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h index 2f36071a86f7..3272b6ae7a35 100644 --- a/fs/xfs/linux-2.6/xfs_export.h +++ b/fs/xfs/linux-2.6/xfs_export.h @@ -59,50 +59,14 @@ * a subdirectory) or use the "fsid" export option. */ +struct xfs_fid64 { + u64 ino; + u32 gen; + u64 parent_ino; + u32 parent_gen; +} __attribute__((packed)); + /* This flag goes on the wire. Don't play with it. */ #define XFS_FILEID_TYPE_64FLAG 0x80 /* NFS fileid has 64bit inodes */ -/* Calculate the length in u32 units of the fileid data */ -static inline int -xfs_fileid_length(int hasparent, int is64) -{ - return hasparent ? (is64 ? 6 : 4) : (is64 ? 3 : 2); -} - -/* - * Decode encoded inode information (either for the inode itself - * or the parent) into an xfs_fid_t structure. Advances and - * returns the new data pointer - */ -static inline __u32 * -xfs_fileid_decode_fid2(__u32 *p, xfs_fid_t *fid, int is64) -{ - fid->fid_len = sizeof(xfs_fid_t) - sizeof(fid->fid_len); - fid->fid_pad = 0; - fid->fid_ino = *p++; -#if XFS_BIG_INUMS - if (is64) - fid->fid_ino |= (((__u64)(*p++)) << 32); -#endif - fid->fid_gen = *p++; - return p; -} - -/* - * Encode inode information (either for the inode itself or the - * parent) into a fileid buffer. Advances and returns the new - * data pointer. - */ -static inline __u32 * -xfs_fileid_encode_inode(__u32 *p, struct inode *inode, int is64) -{ - *p++ = (__u32)inode->i_ino; -#if XFS_BIG_INUMS - if (is64) - *p++ = (__u32)(inode->i_ino >> 32); -#endif - *p++ = inode->i_generation; - return p; -} - #endif /* __XFS_EXPORT_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index c78c23310fe8..3efcf45b14ab 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h @@ -118,7 +118,7 @@ extern int xfs_blkdev_get(struct xfs_mount *, const char *, extern void xfs_blkdev_put(struct block_device *); extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); -extern struct export_operations xfs_export_operations; +extern const struct export_operations xfs_export_operations; #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 4e5d3ca53a8e..a1b1b2ee3e51 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -257,7 +257,8 @@ struct acpi_table_dbgp { struct acpi_table_dmar { struct acpi_table_header header; /* Common ACPI table header */ u8 width; /* Host Address Width */ - u8 reserved[11]; + u8 flags; + u8 reserved[10]; }; /* DMAR subtable header */ @@ -265,8 +266,6 @@ struct acpi_table_dmar { struct acpi_dmar_header { u16 type; u16 length; - u8 flags; - u8 reserved[3]; }; /* Values for subtable type in struct acpi_dmar_header */ @@ -274,13 +273,15 @@ struct acpi_dmar_header { enum acpi_dmar_type { ACPI_DMAR_TYPE_HARDWARE_UNIT = 0, ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, - ACPI_DMAR_TYPE_RESERVED = 2 /* 2 and greater are reserved */ + ACPI_DMAR_TYPE_ATSR = 2, + ACPI_DMAR_TYPE_RESERVED = 3 /* 3 and greater are reserved */ }; struct acpi_dmar_device_scope { u8 entry_type; u8 length; - u8 segment; + u16 reserved; + u8 enumeration_id; u8 bus; }; @@ -290,7 +291,14 @@ enum acpi_dmar_scope_type { ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0, ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1, ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2, - ACPI_DMAR_SCOPE_TYPE_RESERVED = 3 /* 3 and greater are reserved */ + ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3, + ACPI_DMAR_SCOPE_TYPE_HPET = 4, + ACPI_DMAR_SCOPE_TYPE_RESERVED = 5 /* 5 and greater are reserved */ +}; + +struct acpi_dmar_pci_path { + u8 dev; + u8 fn; }; /* @@ -301,6 +309,9 @@ enum acpi_dmar_scope_type { struct acpi_dmar_hardware_unit { struct acpi_dmar_header header; + u8 flags; + u8 reserved; + u16 segment; u64 address; /* Register Base Address */ }; @@ -312,7 +323,9 @@ struct acpi_dmar_hardware_unit { struct acpi_dmar_reserved_memory { struct acpi_dmar_header header; - u64 address; /* 4_k aligned base address */ + u16 reserved; + u16 segment; + u64 base_address; /* 4_k aligned base address */ u64 end_address; /* 4_k aligned limit address */ }; diff --git a/include/asm-alpha/scatterlist.h b/include/asm-alpha/scatterlist.h index 917365405e83..440747ca6349 100644 --- a/include/asm-alpha/scatterlist.h +++ b/include/asm-alpha/scatterlist.h @@ -5,7 +5,10 @@ #include <asm/types.h> struct scatterlist { - struct page *page; +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; unsigned int length; diff --git a/include/asm-arm/scatterlist.h b/include/asm-arm/scatterlist.h index de2f65eb42ed..ca0a37d03400 100644 --- a/include/asm-arm/scatterlist.h +++ b/include/asm-arm/scatterlist.h @@ -5,7 +5,10 @@ #include <asm/types.h> struct scatterlist { - struct page *page; /* buffer page */ +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; /* buffer offset */ dma_addr_t dma_address; /* dma address */ unsigned int length; /* length */ diff --git a/include/asm-avr32/scatterlist.h b/include/asm-avr32/scatterlist.h index c6d5ce3b3a25..377320e3bd17 100644 --- a/include/asm-avr32/scatterlist.h +++ b/include/asm-avr32/scatterlist.h @@ -4,7 +4,10 @@ #include <asm/types.h> struct scatterlist { - struct page *page; +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; dma_addr_t dma_address; unsigned int length; diff --git a/include/asm-blackfin/scatterlist.h b/include/asm-blackfin/scatterlist.h index 60e07b92044c..32128d53469b 100644 --- a/include/asm-blackfin/scatterlist.h +++ b/include/asm-blackfin/scatterlist.h @@ -4,7 +4,10 @@ #include <linux/mm.h> struct scatterlist { - struct page *page; +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; dma_addr_t dma_address; unsigned int length; diff --git a/include/asm-cris/scatterlist.h b/include/asm-cris/scatterlist.h index 4bdc44c4ac3d..faff53ad1f96 100644 --- a/include/asm-cris/scatterlist.h +++ b/include/asm-cris/scatterlist.h @@ -2,11 +2,14 @@ #define __ASM_CRIS_SCATTERLIST_H struct scatterlist { +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif char * address; /* Location data is to be transferred to */ unsigned int length; /* The following is i386 highmem junk - not used by us */ - struct page * page; /* Location for highmem page, if any */ + unsigned long page_link; unsigned int offset;/* for highmem, page offset */ }; diff --git a/include/asm-frv/scatterlist.h b/include/asm-frv/scatterlist.h index 8e827fa853f1..f7da007b763c 100644 --- a/include/asm-frv/scatterlist.h +++ b/include/asm-frv/scatterlist.h @@ -22,7 +22,10 @@ * and that's it. There's no excuse for not highmem enabling YOUR driver. /jens */ struct scatterlist { - struct page *page; /* Location for highmem page, if any */ +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; /* for highmem, page offset */ dma_addr_t dma_address; diff --git a/include/asm-h8300/scatterlist.h b/include/asm-h8300/scatterlist.h index 985fdf54eaca..d3ecdd87ac90 100644 --- a/include/asm-h8300/scatterlist.h +++ b/include/asm-h8300/scatterlist.h @@ -4,7 +4,10 @@ #include <asm/types.h> struct scatterlist { - struct page *page; +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; dma_addr_t dma_address; unsigned int length; diff --git a/include/asm-ia64/scatterlist.h b/include/asm-ia64/scatterlist.h index 7d5234d50312..d6f57874041d 100644 --- a/include/asm-ia64/scatterlist.h +++ b/include/asm-ia64/scatterlist.h @@ -9,7 +9,10 @@ #include <asm/types.h> struct scatterlist { - struct page *page; +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; unsigned int length; /* buffer length */ diff --git a/include/asm-m32r/scatterlist.h b/include/asm-m32r/scatterlist.h index 352415ff5eb9..1ed372c73d0b 100644 --- a/include/asm-m32r/scatterlist.h +++ b/include/asm-m32r/scatterlist.h @@ -4,9 +4,12 @@ #include <asm/types.h> struct scatterlist { +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif char * address; /* Location data is to be transferred to, NULL for * highmem page */ - struct page * page; /* Location for highmem page, if any */ + unsigned long page_link; unsigned int offset;/* for highmem, page offset */ dma_addr_t dma_address; diff --git a/include/asm-m68k/scatterlist.h b/include/asm-m68k/scatterlist.h index 24887a2d9c7b..d3a7a0edfeca 100644 --- a/include/asm-m68k/scatterlist.h +++ b/include/asm-m68k/scatterlist.h @@ -4,7 +4,10 @@ #include <linux/types.h> struct scatterlist { - struct page *page; +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; unsigned int length; diff --git a/include/asm-m68knommu/scatterlist.h b/include/asm-m68knommu/scatterlist.h index 4da79d3d3f34..10942840e88f 100644 --- a/include/asm-m68knommu/scatterlist.h +++ b/include/asm-m68knommu/scatterlist.h @@ -5,7 +5,10 @@ #include <asm/types.h> struct scatterlist { - struct page *page; +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; dma_addr_t dma_address; unsigned int length; diff --git a/include/asm-mips/scatterlist.h b/include/asm-mips/scatterlist.h index 7af104c95b20..83d69fe17c9f 100644 --- a/include/asm-mips/scatterlist.h +++ b/include/asm-mips/scatterlist.h @@ -4,7 +4,10 @@ #include <asm/types.h> struct scatterlist { - struct page * page; +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; dma_addr_t dma_address; unsigned int length; diff --git a/include/asm-parisc/scatterlist.h b/include/asm-parisc/scatterlist.h index e7211c748446..cd3cfdf82289 100644 --- a/include/asm-parisc/scatterlist.h +++ b/include/asm-parisc/scatterlist.h @@ -5,7 +5,10 @@ #include <asm/types.h> struct scatterlist { - struct page *page; +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; unsigned int length; diff --git a/include/asm-powerpc/scatterlist.h b/include/asm-powerpc/scatterlist.h index b075f619c3b7..fcf7d55afe45 100644 --- a/include/asm-powerpc/scatterlist.h +++ b/include/asm-powerpc/scatterlist.h @@ -14,7 +14,10 @@ #include <asm/dma.h> struct scatterlist { - struct page *page; +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; unsigned int length; diff --git a/include/asm-ppc/system.h b/include/asm-ppc/system.h index cc45780421ca..51df94c73846 100644 --- a/include/asm-ppc/system.h +++ b/include/asm-ppc/system.h @@ -33,6 +33,7 @@ #define set_mb(var, value) do { var = value; mb(); } while (0) +#define AT_VECTOR_SIZE_ARCH 6 /* entries in ARCH_DLINFO */ #ifdef CONFIG_SMP #define smp_mb() mb() #define smp_rmb() rmb() diff --git a/include/asm-s390/scatterlist.h b/include/asm-s390/scatterlist.h index a43b3afc5e2d..29ec8e28c8df 100644 --- a/include/asm-s390/scatterlist.h +++ b/include/asm-s390/scatterlist.h @@ -2,7 +2,10 @@ #define _ASMS390_SCATTERLIST_H struct scatterlist { - struct page *page; +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; unsigned int length; }; diff --git a/include/asm-sh/scatterlist.h b/include/asm-sh/scatterlist.h index b9ae53c38365..a7d0d1856a99 100644 --- a/include/asm-sh/scatterlist.h +++ b/include/asm-sh/scatterlist.h @@ -4,7 +4,10 @@ #include <asm/types.h> struct scatterlist { - struct page * page; /* Location for highmem page, if any */ +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset;/* for highmem, page offset */ dma_addr_t dma_address; unsigned int length; diff --git a/include/asm-sh64/scatterlist.h b/include/asm-sh64/scatterlist.h index 1c723f2d7a95..5109251970e7 100644 --- a/include/asm-sh64/scatterlist.h +++ b/include/asm-sh64/scatterlist.h @@ -14,7 +14,10 @@ #include <asm/types.h> struct scatterlist { - struct page * page; /* Location for highmem page, if any */ +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset;/* for highmem, page offset */ dma_addr_t dma_address; unsigned int length; diff --git a/include/asm-sparc/scatterlist.h b/include/asm-sparc/scatterlist.h index 4055af90ad7e..e08d3d775b08 100644 --- a/include/asm-sparc/scatterlist.h +++ b/include/asm-sparc/scatterlist.h @@ -5,7 +5,10 @@ #include <linux/types.h> struct scatterlist { - struct page *page; +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; unsigned int length; diff --git a/include/asm-sparc64/scatterlist.h b/include/asm-sparc64/scatterlist.h index 703c5bbe6c8c..6df23f070b1a 100644 --- a/include/asm-sparc64/scatterlist.h +++ b/include/asm-sparc64/scatterlist.h @@ -6,7 +6,10 @@ #include <asm/types.h> struct scatterlist { - struct page *page; +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; unsigned int length; diff --git a/include/asm-v850/scatterlist.h b/include/asm-v850/scatterlist.h index 56f402920db9..02d27b3fb061 100644 --- a/include/asm-v850/scatterlist.h +++ b/include/asm-v850/scatterlist.h @@ -17,7 +17,10 @@ #include <asm/types.h> struct scatterlist { - struct page *page; +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned offset; dma_addr_t dma_address; unsigned length; diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h index ef67b59dbdb9..dc031cf44633 100644 --- a/include/asm-x86/bootparam.h +++ b/include/asm-x86/bootparam.h @@ -28,8 +28,9 @@ struct setup_header { u16 kernel_version; u8 type_of_loader; u8 loadflags; -#define LOADED_HIGH 0x01 -#define CAN_USE_HEAP 0x80 +#define LOADED_HIGH (1<<0) +#define KEEP_SEGMENTS (1<<6) +#define CAN_USE_HEAP (1<<7) u16 setup_move_size; u32 code32_start; u32 ramdisk_image; @@ -41,6 +42,10 @@ struct setup_header { u32 initrd_addr_max; u32 kernel_alignment; u8 relocatable_kernel; + u8 _pad2[3]; + u32 cmdline_size; + u32 hardware_subarch; + u64 hardware_subarch_data; } __attribute__((packed)); struct sys_desc_table { diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h index b3d43de44c59..9411a2d3f19c 100644 --- a/include/asm-x86/cacheflush.h +++ b/include/asm-x86/cacheflush.h @@ -27,6 +27,7 @@ void global_flush_tlb(void); int change_page_attr(struct page *page, int numpages, pgprot_t prot); int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot); +void clflush_cache_range(void *addr, int size); #ifdef CONFIG_DEBUG_PAGEALLOC /* internal debugging function */ diff --git a/include/asm-x86/device.h b/include/asm-x86/device.h index d9ee5e52e91b..87a715367a1b 100644 --- a/include/asm-x86/device.h +++ b/include/asm-x86/device.h @@ -5,6 +5,9 @@ struct dev_archdata { #ifdef CONFIG_ACPI void *acpi_handle; #endif +#ifdef CONFIG_DMAR + void *iommu; /* hook for IOMMU specific extension */ +#endif }; #endif /* _ASM_X86_DEVICE_H */ diff --git a/include/asm-x86/dma-mapping_32.h b/include/asm-x86/dma-mapping_32.h index 6a2d26cb5da6..55f01bd9e556 100644 --- a/include/asm-x86/dma-mapping_32.h +++ b/include/asm-x86/dma-mapping_32.h @@ -45,9 +45,9 @@ dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, WARN_ON(nents == 0 || sglist[0].length == 0); for_each_sg(sglist, sg, nents, i) { - BUG_ON(!sg->page); + BUG_ON(!sg_page(sg)); - sg->dma_address = page_to_phys(sg->page) + sg->offset; + sg->dma_address = sg_phys(sg); } flush_write_buffers(); diff --git a/include/asm-x86/scatterlist_32.h b/include/asm-x86/scatterlist_32.h index bd5164aa8f63..0e7d997a34be 100644 --- a/include/asm-x86/scatterlist_32.h +++ b/include/asm-x86/scatterlist_32.h @@ -4,7 +4,10 @@ #include <asm/types.h> struct scatterlist { - struct page *page; +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; dma_addr_t dma_address; unsigned int length; diff --git a/include/asm-x86/scatterlist_64.h b/include/asm-x86/scatterlist_64.h index ef3986ba4b79..1847c72befeb 100644 --- a/include/asm-x86/scatterlist_64.h +++ b/include/asm-x86/scatterlist_64.h @@ -4,7 +4,10 @@ #include <asm/types.h> struct scatterlist { - struct page *page; +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; unsigned int length; dma_addr_t dma_address; diff --git a/include/asm-xtensa/scatterlist.h b/include/asm-xtensa/scatterlist.h index ca337a294290..810080bb0a2b 100644 --- a/include/asm-xtensa/scatterlist.h +++ b/include/asm-xtensa/scatterlist.h @@ -14,7 +14,10 @@ #include <asm/types.h> struct scatterlist { - struct page *page; +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; unsigned int offset; dma_addr_t dma_address; unsigned int length; diff --git a/include/linux/capability.h b/include/linux/capability.h index 7a8d7ade28a0..bb017edffd56 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -56,10 +56,8 @@ typedef struct __user_cap_data_struct { struct vfs_cap_data { __u32 magic_etc; /* Little endian */ - struct { - __u32 permitted; /* Little endian */ - __u32 inheritable; /* Little endian */ - } data[1]; + __u32 permitted; /* Little endian */ + __u32 inheritable; /* Little endian */ }; #ifdef __KERNEL__ diff --git a/include/linux/dmar.h b/include/linux/dmar.h new file mode 100644 index 000000000000..ffb6439cb5e6 --- /dev/null +++ b/include/linux/dmar.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) Ashok Raj <ashok.raj@intel.com> + * Copyright (C) Shaohua Li <shaohua.li@intel.com> + */ + +#ifndef __DMAR_H__ +#define __DMAR_H__ + +#include <linux/acpi.h> +#include <linux/types.h> +#include <linux/msi.h> + +#ifdef CONFIG_DMAR +struct intel_iommu; + +extern char *dmar_get_fault_reason(u8 fault_reason); + +/* Can't use the common MSI interrupt functions + * since DMAR is not a pci device + */ +extern void dmar_msi_unmask(unsigned int irq); +extern void dmar_msi_mask(unsigned int irq); +extern void dmar_msi_read(int irq, struct msi_msg *msg); +extern void dmar_msi_write(int irq, struct msi_msg *msg); +extern int dmar_set_interrupt(struct intel_iommu *iommu); +extern int arch_setup_dmar_msi(unsigned int irq); + +/* Intel IOMMU detection and initialization functions */ +extern void detect_intel_iommu(void); +extern int intel_iommu_init(void); + +extern int dmar_table_init(void); +extern int early_dmar_detect(void); + +extern struct list_head dmar_drhd_units; +extern struct list_head dmar_rmrr_units; + +struct dmar_drhd_unit { + struct list_head list; /* list of drhd units */ + u64 reg_base_addr; /* register base address*/ + struct pci_dev **devices; /* target device array */ + int devices_cnt; /* target device count */ + u8 ignored:1; /* ignore drhd */ + u8 include_all:1; + struct intel_iommu *iommu; +}; + +struct dmar_rmrr_unit { + struct list_head list; /* list of rmrr units */ + u64 base_address; /* reserved base address*/ + u64 end_address; /* reserved end address */ + struct pci_dev **devices; /* target devices */ + int devices_cnt; /* target device count */ +}; + +#define for_each_drhd_unit(drhd) \ + list_for_each_entry(drhd, &dmar_drhd_units, list) +#define for_each_rmrr_units(rmrr) \ + list_for_each_entry(rmrr, &dmar_rmrr_units, list) +#else +static inline void detect_intel_iommu(void) +{ + return; +} +static inline int intel_iommu_init(void) +{ + return -ENODEV; +} + +#endif /* !CONFIG_DMAR */ +#endif /* __DMAR_H__ */ diff --git a/include/linux/efi.h b/include/linux/efi.h index 0b9579a4cd42..14813b595802 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -298,7 +298,7 @@ extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, u64 attr); extern int __init efi_uart_console_only (void); extern void efi_initialize_iomem_resources(struct resource *code_resource, - struct resource *data_resource); + struct resource *data_resource, struct resource *bss_resource); extern unsigned long efi_get_time(void); extern int efi_set_rtc_mmss(unsigned long nowtime); extern int is_available_memory(efi_memory_desc_t * md); diff --git a/include/linux/efs_fs.h b/include/linux/efs_fs.h index 16cb25cbf7c5..dd57fe523e97 100644 --- a/include/linux/efs_fs.h +++ b/include/linux/efs_fs.h @@ -35,6 +35,7 @@ static inline struct efs_sb_info *SUPER_INFO(struct super_block *sb) } struct statfs; +struct fid; extern const struct inode_operations efs_dir_inode_operations; extern const struct file_operations efs_dir_operations; @@ -45,7 +46,10 @@ extern efs_block_t efs_map_block(struct inode *, efs_block_t); extern int efs_get_block(struct inode *, sector_t, struct buffer_head *, int); extern struct dentry *efs_lookup(struct inode *, struct dentry *, struct nameidata *); -extern struct dentry *efs_get_dentry(struct super_block *sb, void *vobjp); +extern struct dentry *efs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type); +extern struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type); extern struct dentry *efs_get_parent(struct dentry *); extern int efs_bmap(struct inode *, int); diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 8872fe8392d6..51d214138814 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -4,9 +4,48 @@ #include <linux/types.h> struct dentry; +struct inode; struct super_block; struct vfsmount; +/* + * The fileid_type identifies how the file within the filesystem is encoded. + * In theory this is freely set and parsed by the filesystem, but we try to + * stick to conventions so we can share some generic code and don't confuse + * sniffers like ethereal/wireshark. + * + * The filesystem must not use the value '0' or '0xff'. + */ +enum fid_type { + /* + * The root, or export point, of the filesystem. + * (Never actually passed down to the filesystem. + */ + FILEID_ROOT = 0, + + /* + * 32bit inode number, 32 bit generation number. + */ + FILEID_INO32_GEN = 1, + + /* + * 32bit inode number, 32 bit generation number, + * 32 bit parent directory inode number. + */ + FILEID_INO32_GEN_PARENT = 2, +}; + +struct fid { + union { + struct { + u32 ino; + u32 gen; + u32 parent_ino; + u32 parent_gen; + } i32; + __u32 raw[6]; + }; +}; /** * struct export_operations - for nfsd to communicate with file systems @@ -15,43 +54,9 @@ struct vfsmount; * @get_name: find the name for a given inode in a given directory * @get_parent: find the parent of a given directory * @get_dentry: find a dentry for the inode given a file handle sub-fragment - * @find_exported_dentry: - * set by the exporting module to a standard helper function. - * - * Description: - * The export_operations structure provides a means for nfsd to communicate - * with a particular exported file system - particularly enabling nfsd and - * the filesystem to co-operate when dealing with file handles. - * - * export_operations contains two basic operation for dealing with file - * handles, decode_fh() and encode_fh(), and allows for some other - * operations to be defined which standard helper routines use to get - * specific information from the filesystem. - * - * nfsd encodes information use to determine which filesystem a filehandle - * applies to in the initial part of the file handle. The remainder, termed - * a file handle fragment, is controlled completely by the filesystem. The - * standard helper routines assume that this fragment will contain one or - * two sub-fragments, one which identifies the file, and one which may be - * used to identify the (a) directory containing the file. * - * In some situations, nfsd needs to get a dentry which is connected into a - * specific part of the file tree. To allow for this, it passes the - * function acceptable() together with a @context which can be used to see - * if the dentry is acceptable. As there can be multiple dentrys for a - * given file, the filesystem should check each one for acceptability before - * looking for the next. As soon as an acceptable one is found, it should - * be returned. - * - * decode_fh: - * @decode_fh is given a &struct super_block (@sb), a file handle fragment - * (@fh, @fh_len) and an acceptability testing function (@acceptable, - * @context). It should return a &struct dentry which refers to the same - * file that the file handle fragment refers to, and which passes the - * acceptability test. If it cannot, it should return a %NULL pointer if - * the file was found but no acceptable &dentries were available, or a - * %ERR_PTR error code indicating why it couldn't be found (e.g. %ENOENT or - * %ENOMEM). + * See Documentation/filesystems/Exporting for details on how to use + * this interface correctly. * * encode_fh: * @encode_fh should store in the file handle fragment @fh (using at most @@ -63,6 +68,21 @@ struct vfsmount; * the filehandle fragment. encode_fh() should return the number of bytes * stored or a negative error code such as %-ENOSPC * + * fh_to_dentry: + * @fh_to_dentry is given a &struct super_block (@sb) and a file handle + * fragment (@fh, @fh_len). It should return a &struct dentry which refers + * to the same file that the file handle fragment refers to. If it cannot, + * it should return a %NULL pointer if the file was found but no acceptable + * &dentries were available, or an %ERR_PTR error code indicating why it + * couldn't be found (e.g. %ENOENT or %ENOMEM). Any suitable dentry can be + * returned including, if necessary, a new dentry created with d_alloc_root. + * The caller can then find any other extant dentries by following the + * d_alias links. + * + * fh_to_parent: + * Same as @fh_to_dentry, except that it returns a pointer to the parent + * dentry if it was encoded into the filehandle fragment by @encode_fh. + * * get_name: * @get_name should find a name for the given @child in the given @parent * directory. The name should be stored in the @name (with the @@ -75,52 +95,37 @@ struct vfsmount; * is also a directory. In the event that it cannot be found, or storage * space cannot be allocated, a %ERR_PTR should be returned. * - * get_dentry: - * Given a &super_block (@sb) and a pointer to a file-system specific inode - * identifier, possibly an inode number, (@inump) get_dentry() should find - * the identified inode and return a dentry for that inode. Any suitable - * dentry can be returned including, if necessary, a new dentry created with - * d_alloc_root. The caller can then find any other extant dentrys by - * following the d_alias links. If a new dentry was created using - * d_alloc_root, DCACHE_NFSD_DISCONNECTED should be set, and the dentry - * should be d_rehash()ed. - * - * If the inode cannot be found, either a %NULL pointer or an %ERR_PTR code - * can be returned. The @inump will be whatever was passed to - * nfsd_find_fh_dentry() in either the @obj or @parent parameters. - * * Locking rules: * get_parent is called with child->d_inode->i_mutex down * get_name is not (which is possibly inconsistent) */ struct export_operations { - struct dentry *(*decode_fh)(struct super_block *sb, __u32 *fh, - int fh_len, int fh_type, - int (*acceptable)(void *context, struct dentry *de), - void *context); int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len, int connectable); + struct dentry * (*fh_to_dentry)(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type); + struct dentry * (*fh_to_parent)(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type); int (*get_name)(struct dentry *parent, char *name, struct dentry *child); struct dentry * (*get_parent)(struct dentry *child); - struct dentry * (*get_dentry)(struct super_block *sb, void *inump); - - /* This is set by the exporting module to a standard helper */ - struct dentry * (*find_exported_dentry)( - struct super_block *sb, void *obj, void *parent, - int (*acceptable)(void *context, struct dentry *de), - void *context); }; -extern struct dentry *find_exported_dentry(struct super_block *sb, void *obj, - void *parent, int (*acceptable)(void *context, struct dentry *de), - void *context); - -extern int exportfs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, - int connectable); -extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, __u32 *fh, +extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, + int *max_len, int connectable); +extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *), void *context); +/* + * Generic helpers for filesystems. + */ +extern struct dentry *generic_fh_to_dentry(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type, + struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen)); +extern struct dentry *generic_fh_to_parent(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type, + struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen)); + #endif /* LINUX_EXPORTFS_H */ diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index c77c3bbfe4bb..0f6c86c634fd 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h @@ -561,6 +561,7 @@ enum { #define EXT2_DIR_ROUND (EXT2_DIR_PAD - 1) #define EXT2_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT2_DIR_ROUND) & \ ~EXT2_DIR_ROUND) +#define EXT2_MAX_REC_LEN ((1<<16)-1) static inline ext2_fsblk_t ext2_group_first_block_no(struct super_block *sb, unsigned long group_no) diff --git a/include/linux/fs.h b/include/linux/fs.h index 50078bb30a1c..b3ec4a496d64 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -987,7 +987,7 @@ struct super_block { const struct super_operations *s_op; struct dquot_operations *dq_op; struct quotactl_ops *s_qcop; - struct export_operations *s_export_op; + const struct export_operations *s_export_op; unsigned long s_flags; unsigned long s_magic; struct dentry *s_root; diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 6c9873f88287..ff203dd02919 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -34,6 +34,12 @@ name: #endif +#ifndef WEAK +#define WEAK(name) \ + .weak name; \ + name: +#endif + #define KPROBE_ENTRY(name) \ .pushsection .kprobes.text, "ax"; \ ENTRY(name) diff --git a/include/linux/memory.h b/include/linux/memory.h index 654ef5544878..33f0ff0cf634 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -41,18 +41,15 @@ struct memory_block { #define MEM_ONLINE (1<<0) /* exposed to userspace */ #define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */ #define MEM_OFFLINE (1<<2) /* exposed to userspace */ +#define MEM_GOING_ONLINE (1<<3) +#define MEM_CANCEL_ONLINE (1<<4) +#define MEM_CANCEL_OFFLINE (1<<5) -/* - * All of these states are currently kernel-internal for notifying - * kernel components and architectures. - * - * For MEM_MAPPING_INVALID, all notifier chains with priority >0 - * are called before pfn_to_page() becomes invalid. The priority=0 - * entry is reserved for the function that actually makes - * pfn_to_page() stop working. Any notifiers that want to be called - * after that should have priority <0. - */ -#define MEM_MAPPING_INVALID (1<<3) +struct memory_notify { + unsigned long start_pfn; + unsigned long nr_pages; + int status_change_nid; +}; struct notifier_block; struct mem_section; @@ -69,21 +66,31 @@ static inline int register_memory_notifier(struct notifier_block *nb) static inline void unregister_memory_notifier(struct notifier_block *nb) { } +static inline int memory_notify(unsigned long val, void *v) +{ + return 0; +} #else +extern int register_memory_notifier(struct notifier_block *nb); +extern void unregister_memory_notifier(struct notifier_block *nb); extern int register_new_memory(struct mem_section *); extern int unregister_memory_section(struct mem_section *); extern int memory_dev_init(void); extern int remove_memory_block(unsigned long, struct mem_section *, int); - +extern int memory_notify(unsigned long val, void *v); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ +#ifdef CONFIG_MEMORY_HOTPLUG #define hotplug_memory_notifier(fn, pri) { \ static struct notifier_block fn##_mem_nb = \ { .notifier_call = fn, .priority = pri }; \ register_memory_notifier(&fn##_mem_nb); \ } +#else +#define hotplug_memory_notifier(fn, pri) do { } while (0) +#endif #endif /* _LINUX_MEMORY_H_ */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 768b93359f90..5d2281f661f7 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -141,6 +141,7 @@ struct pci_dev { unsigned int class; /* 3 bytes: (base,sub,prog-if) */ u8 revision; /* PCI revision, low byte of class word */ u8 hdr_type; /* PCI header type (`multi' flag masked out) */ + u8 pcie_type; /* PCI-E device/port type */ u8 rom_base_reg; /* which config register controls the ROM */ u8 pin; /* which interrupt pin this device uses */ @@ -183,6 +184,7 @@ struct pci_dev { unsigned int msi_enabled:1; unsigned int msix_enabled:1; unsigned int is_managed:1; + unsigned int is_pcie:1; atomic_t enable_cnt; /* pci_enable_device has been called */ u32 saved_config_space[16]; /* config space saved at suspend time */ diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 72bfccd3da22..422eab4958a6 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -28,6 +28,8 @@ #include <linux/reiserfs_fs_sb.h> #endif +struct fid; + /* * include/linux/reiser_fs.h * @@ -1877,12 +1879,10 @@ void reiserfs_delete_inode(struct inode *inode); int reiserfs_write_inode(struct inode *inode, int); int reiserfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_result, int create); -struct dentry *reiserfs_get_dentry(struct super_block *, void *); -struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data, - int len, int fhtype, - int (*acceptable) (void *contect, - struct dentry * de), - void *context); +struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type); +struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type); int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, int connectable); diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 2dc7464cce52..42daf5e15265 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -4,47 +4,95 @@ #include <asm/scatterlist.h> #include <linux/mm.h> #include <linux/string.h> +#include <asm/io.h> +/* + * Notes on SG table design. + * + * Architectures must provide an unsigned long page_link field in the + * scatterlist struct. We use that to place the page pointer AND encode + * information about the sg table as well. The two lower bits are reserved + * for this information. + * + * If bit 0 is set, then the page_link contains a pointer to the next sg + * table list. Otherwise the next entry is at sg + 1. + * + * If bit 1 is set, then this sg entry is the last element in a list. + * + * See sg_next(). + * + */ + +#define SG_MAGIC 0x87654321 + +/** + * sg_set_page - Set sg entry to point at given page + * @sg: SG entry + * @page: The page + * + * Description: + * Use this function to set an sg entry pointing at a page, never assign + * the page directly. We encode sg table information in the lower bits + * of the page pointer. See sg_page() for looking up the page belonging + * to an sg entry. + * + **/ +static inline void sg_set_page(struct scatterlist *sg, struct page *page) +{ + unsigned long page_link = sg->page_link & 0x3; + +#ifdef CONFIG_DEBUG_SG + BUG_ON(sg->sg_magic != SG_MAGIC); +#endif + sg->page_link = page_link | (unsigned long) page; +} + +#define sg_page(sg) ((struct page *) ((sg)->page_link & ~0x3)) + +/** + * sg_set_buf - Set sg entry to point at given data + * @sg: SG entry + * @buf: Data + * @buflen: Data length + * + **/ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, unsigned int buflen) { - sg->page = virt_to_page(buf); + sg_set_page(sg, virt_to_page(buf)); sg->offset = offset_in_page(buf); sg->length = buflen; } -static inline void sg_init_one(struct scatterlist *sg, const void *buf, - unsigned int buflen) -{ - memset(sg, 0, sizeof(*sg)); - sg_set_buf(sg, buf, buflen); -} - /* * We overload the LSB of the page pointer to indicate whether it's * a valid sg entry, or whether it points to the start of a new scatterlist. * Those low bits are there for everyone! (thanks mason :-) */ -#define sg_is_chain(sg) ((unsigned long) (sg)->page & 0x01) +#define sg_is_chain(sg) ((sg)->page_link & 0x01) +#define sg_is_last(sg) ((sg)->page_link & 0x02) #define sg_chain_ptr(sg) \ - ((struct scatterlist *) ((unsigned long) (sg)->page & ~0x01)) + ((struct scatterlist *) ((sg)->page_link & ~0x03)) /** * sg_next - return the next scatterlist entry in a list * @sg: The current sg entry * - * Usually the next entry will be @sg@ + 1, but if this sg element is part - * of a chained scatterlist, it could jump to the start of a new - * scatterlist array. - * - * Note that the caller must ensure that there are further entries after - * the current entry, this function will NOT return NULL for an end-of-list. + * Description: + * Usually the next entry will be @sg@ + 1, but if this sg element is part + * of a chained scatterlist, it could jump to the start of a new + * scatterlist array. * - */ + **/ static inline struct scatterlist *sg_next(struct scatterlist *sg) { - sg++; +#ifdef CONFIG_DEBUG_SG + BUG_ON(sg->sg_magic != SG_MAGIC); +#endif + if (sg_is_last(sg)) + return NULL; + sg++; if (unlikely(sg_is_chain(sg))) sg = sg_chain_ptr(sg); @@ -62,14 +110,15 @@ static inline struct scatterlist *sg_next(struct scatterlist *sg) * @sgl: First entry in the scatterlist * @nents: Number of entries in the scatterlist * - * Should only be used casually, it (currently) scan the entire list - * to get the last entry. + * Description: + * Should only be used casually, it (currently) scan the entire list + * to get the last entry. * - * Note that the @sgl@ pointer passed in need not be the first one, - * the important bit is that @nents@ denotes the number of entries that - * exist from @sgl@. + * Note that the @sgl@ pointer passed in need not be the first one, + * the important bit is that @nents@ denotes the number of entries that + * exist from @sgl@. * - */ + **/ static inline struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) { @@ -83,6 +132,10 @@ static inline struct scatterlist *sg_last(struct scatterlist *sgl, ret = sg; #endif +#ifdef CONFIG_DEBUG_SG + BUG_ON(sgl[0].sg_magic != SG_MAGIC); + BUG_ON(!sg_is_last(ret)); +#endif return ret; } @@ -92,16 +145,111 @@ static inline struct scatterlist *sg_last(struct scatterlist *sgl, * @prv_nents: Number of entries in prv * @sgl: Second scatterlist * - * Links @prv@ and @sgl@ together, to form a longer scatterlist. + * Description: + * Links @prv@ and @sgl@ together, to form a longer scatterlist. * - */ + **/ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, struct scatterlist *sgl) { #ifndef ARCH_HAS_SG_CHAIN BUG(); #endif - prv[prv_nents - 1].page = (struct page *) ((unsigned long) sgl | 0x01); + prv[prv_nents - 1].page_link = (unsigned long) sgl | 0x01; +} + +/** + * sg_mark_end - Mark the end of the scatterlist + * @sgl: Scatterlist + * @nents: Number of entries in sgl + * + * Description: + * Marks the last entry as the termination point for sg_next() + * + **/ +static inline void sg_mark_end(struct scatterlist *sgl, unsigned int nents) +{ + sgl[nents - 1].page_link = 0x02; +} + +static inline void __sg_mark_end(struct scatterlist *sg) +{ + sg->page_link |= 0x02; +} + +/** + * sg_init_one - Initialize a single entry sg list + * @sg: SG entry + * @buf: Virtual address for IO + * @buflen: IO length + * + * Notes: + * This should not be used on a single entry that is part of a larger + * table. Use sg_init_table() for that. + * + **/ +static inline void sg_init_one(struct scatterlist *sg, const void *buf, + unsigned int buflen) +{ + memset(sg, 0, sizeof(*sg)); +#ifdef CONFIG_DEBUG_SG + sg->sg_magic = SG_MAGIC; +#endif + sg_mark_end(sg, 1); + sg_set_buf(sg, buf, buflen); +} + +/** + * sg_init_table - Initialize SG table + * @sgl: The SG table + * @nents: Number of entries in table + * + * Notes: + * If this is part of a chained sg table, sg_mark_end() should be + * used only on the last table part. + * + **/ +static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents) +{ + memset(sgl, 0, sizeof(*sgl) * nents); + sg_mark_end(sgl, nents); +#ifdef CONFIG_DEBUG_SG + { + int i; + for (i = 0; i < nents; i++) + sgl[i].sg_magic = SG_MAGIC; + } +#endif +} + +/** + * sg_phys - Return physical address of an sg entry + * @sg: SG entry + * + * Description: + * This calls page_to_phys() on the page in this sg entry, and adds the + * sg offset. The caller must know that it is legal to call page_to_phys() + * on the sg page. + * + **/ +static inline unsigned long sg_phys(struct scatterlist *sg) +{ + return page_to_phys(sg_page(sg)) + sg->offset; +} + +/** + * sg_virt - Return virtual address of an sg entry + * @sg: SG entry + * + * Description: + * This calls page_address() on the page in this sg entry, and adds the + * sg offset. The caller must know that the sg page has a valid virtual + * mapping. + * + **/ +static inline void *sg_virt(struct scatterlist *sg) +{ + return page_address(sg_page(sg)) + sg->offset; } #endif /* _LINUX_SCATTERLIST_H */ diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index 3c9ef5a7d575..ed6fe51df77a 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -731,7 +731,7 @@ static struct trans_ctl_table trans_net_table[] = { { NET_UNIX, "unix", trans_net_unix_table }, { NET_IPV4, "ipv4", trans_net_ipv4_table }, { NET_IPX, "ipx", trans_net_ipx_table }, - { NET_ATALK, "atalk", trans_net_atalk_table }, + { NET_ATALK, "appletalk", trans_net_atalk_table }, { NET_NETROM, "netrom", trans_net_netrom_table }, { NET_AX25, "ax25", trans_net_ax25_table }, { NET_BRIDGE, "bridge", trans_net_bridge_table }, diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c567f219191d..1faa5087dc86 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -389,6 +389,16 @@ config DEBUG_LIST If unsure, say N. +config DEBUG_SG + bool "Debug SG table operations" + depends on DEBUG_KERNEL + help + Enable this to turn on checks on scatter-gather tables. This can + help find problems with drivers that do not properly initialize + their sg tables. + + If unsure, say N. + config FRAME_POINTER bool "Compile the kernel with frame pointers" depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390 || AVR32 || SUPERH || BFIN) diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 752fd95323f3..1a8050ade861 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -35,7 +35,7 @@ #define OFFSET(val,align) ((unsigned long) \ ( (val) & ( (align) - 1))) -#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) +#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg))) #define SG_ENT_PHYS_ADDRESS(sg) virt_to_bus(SG_ENT_VIRT_ADDRESS(sg)) /* diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1833879f8438..3a47871a29d9 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -187,7 +187,24 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) unsigned long onlined_pages = 0; struct zone *zone; int need_zonelists_rebuild = 0; + int nid; + int ret; + struct memory_notify arg; + + arg.start_pfn = pfn; + arg.nr_pages = nr_pages; + arg.status_change_nid = -1; + + nid = page_to_nid(pfn_to_page(pfn)); + if (node_present_pages(nid) == 0) + arg.status_change_nid = nid; + ret = memory_notify(MEM_GOING_ONLINE, &arg); + ret = notifier_to_errno(ret); + if (ret) { + memory_notify(MEM_CANCEL_ONLINE, &arg); + return ret; + } /* * This doesn't need a lock to do pfn_to_page(). * The section can't be removed here because of the @@ -222,6 +239,10 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) build_all_zonelists(); vm_total_pages = nr_free_pagecache_pages(); writeback_set_ratelimit(); + + if (onlined_pages) + memory_notify(MEM_ONLINE, &arg); + return 0; } #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ @@ -467,8 +488,9 @@ int offline_pages(unsigned long start_pfn, { unsigned long pfn, nr_pages, expire; long offlined_pages; - int ret, drain, retry_max; + int ret, drain, retry_max, node; struct zone *zone; + struct memory_notify arg; BUG_ON(start_pfn >= end_pfn); /* at least, alignment against pageblock is necessary */ @@ -480,11 +502,27 @@ int offline_pages(unsigned long start_pfn, we assume this for now. .*/ if (!test_pages_in_a_zone(start_pfn, end_pfn)) return -EINVAL; + + zone = page_zone(pfn_to_page(start_pfn)); + node = zone_to_nid(zone); + nr_pages = end_pfn - start_pfn; + /* set above range as isolated */ ret = start_isolate_page_range(start_pfn, end_pfn); if (ret) return ret; - nr_pages = end_pfn - start_pfn; + + arg.start_pfn = start_pfn; + arg.nr_pages = nr_pages; + arg.status_change_nid = -1; + if (nr_pages >= node_present_pages(node)) + arg.status_change_nid = node; + + ret = memory_notify(MEM_GOING_OFFLINE, &arg); + ret = notifier_to_errno(ret); + if (ret) + goto failed_removal; + pfn = start_pfn; expire = jiffies + timeout; drain = 0; @@ -539,20 +577,24 @@ repeat: /* reset pagetype flags */ start_isolate_page_range(start_pfn, end_pfn); /* removal success */ - zone = page_zone(pfn_to_page(start_pfn)); zone->present_pages -= offlined_pages; zone->zone_pgdat->node_present_pages -= offlined_pages; totalram_pages -= offlined_pages; num_physpages -= offlined_pages; + vm_total_pages = nr_free_pagecache_pages(); writeback_set_ratelimit(); + + memory_notify(MEM_OFFLINE, &arg); return 0; failed_removal: printk(KERN_INFO "memory offlining %lx to %lx failed\n", start_pfn, end_pfn); + memory_notify(MEM_CANCEL_OFFLINE, &arg); /* pushback to free area */ undo_isolate_page_range(start_pfn, end_pfn); + return ret; } #else diff --git a/mm/shmem.c b/mm/shmem.c index 289dbb0a6fd6..404e53bb2127 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2020,33 +2020,25 @@ static int shmem_match(struct inode *ino, void *vfh) return ino->i_ino == inum && fh[0] == ino->i_generation; } -static struct dentry *shmem_get_dentry(struct super_block *sb, void *vfh) +static struct dentry *shmem_fh_to_dentry(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) { - struct dentry *de = NULL; struct inode *inode; - __u32 *fh = vfh; - __u64 inum = fh[2]; - inum = (inum << 32) | fh[1]; + struct dentry *dentry = NULL; + u64 inum = fid->raw[2]; + inum = (inum << 32) | fid->raw[1]; + + if (fh_len < 3) + return NULL; - inode = ilookup5(sb, (unsigned long)(inum+fh[0]), shmem_match, vfh); + inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]), + shmem_match, fid->raw); if (inode) { - de = d_find_alias(inode); + dentry = d_find_alias(inode); iput(inode); } - return de? de: ERR_PTR(-ESTALE); -} - -static struct dentry *shmem_decode_fh(struct super_block *sb, __u32 *fh, - int len, int type, - int (*acceptable)(void *context, struct dentry *de), - void *context) -{ - if (len < 3) - return ERR_PTR(-ESTALE); - - return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable, - context); + return dentry; } static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, @@ -2079,11 +2071,10 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, return 1; } -static struct export_operations shmem_export_ops = { +static const struct export_operations shmem_export_ops = { .get_parent = shmem_get_parent, - .get_dentry = shmem_get_dentry, .encode_fh = shmem_encode_fh, - .decode_fh = shmem_decode_fh, + .fh_to_dentry = shmem_fh_to_dentry, }; static int shmem_parse_options(char *options, int *mode, uid_t *uid, diff --git a/mm/slub.c b/mm/slub.c index e29a42988c78..aac1dd3c657d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -20,6 +20,7 @@ #include <linux/mempolicy.h> #include <linux/ctype.h> #include <linux/kallsyms.h> +#include <linux/memory.h> /* * Lock order: @@ -2694,6 +2695,121 @@ int kmem_cache_shrink(struct kmem_cache *s) } EXPORT_SYMBOL(kmem_cache_shrink); +#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG) +static int slab_mem_going_offline_callback(void *arg) +{ + struct kmem_cache *s; + + down_read(&slub_lock); + list_for_each_entry(s, &slab_caches, list) + kmem_cache_shrink(s); + up_read(&slub_lock); + + return 0; +} + +static void slab_mem_offline_callback(void *arg) +{ + struct kmem_cache_node *n; + struct kmem_cache *s; + struct memory_notify *marg = arg; + int offline_node; + + offline_node = marg->status_change_nid; + + /* + * If the node still has available memory. we need kmem_cache_node + * for it yet. + */ + if (offline_node < 0) + return; + + down_read(&slub_lock); + list_for_each_entry(s, &slab_caches, list) { + n = get_node(s, offline_node); + if (n) { + /* + * if n->nr_slabs > 0, slabs still exist on the node + * that is going down. We were unable to free them, + * and offline_pages() function shoudn't call this + * callback. So, we must fail. + */ + BUG_ON(atomic_read(&n->nr_slabs)); + + s->node[offline_node] = NULL; + kmem_cache_free(kmalloc_caches, n); + } + } + up_read(&slub_lock); +} + +static int slab_mem_going_online_callback(void *arg) +{ + struct kmem_cache_node *n; + struct kmem_cache *s; + struct memory_notify *marg = arg; + int nid = marg->status_change_nid; + int ret = 0; + + /* + * If the node's memory is already available, then kmem_cache_node is + * already created. Nothing to do. + */ + if (nid < 0) + return 0; + + /* + * We are bringing a node online. No memory is availabe yet. We must + * allocate a kmem_cache_node structure in order to bring the node + * online. + */ + down_read(&slub_lock); + list_for_each_entry(s, &slab_caches, list) { + /* + * XXX: kmem_cache_alloc_node will fallback to other nodes + * since memory is not yet available from the node that + * is brought up. + */ + n = kmem_cache_alloc(kmalloc_caches, GFP_KERNEL); + if (!n) { + ret = -ENOMEM; + goto out; + } + init_kmem_cache_node(n); + s->node[nid] = n; + } +out: + up_read(&slub_lock); + return ret; +} + +static int slab_memory_callback(struct notifier_block *self, + unsigned long action, void *arg) +{ + int ret = 0; + + switch (action) { + case MEM_GOING_ONLINE: + ret = slab_mem_going_online_callback(arg); + break; + case MEM_GOING_OFFLINE: + ret = slab_mem_going_offline_callback(arg); + break; + case MEM_OFFLINE: + case MEM_CANCEL_ONLINE: + slab_mem_offline_callback(arg); + break; + case MEM_ONLINE: + case MEM_CANCEL_OFFLINE: + break; + } + + ret = notifier_from_errno(ret); + return ret; +} + +#endif /* CONFIG_MEMORY_HOTPLUG */ + /******************************************************************** * Basic setup of slabs *******************************************************************/ @@ -2715,6 +2831,8 @@ void __init kmem_cache_init(void) sizeof(struct kmem_cache_node), GFP_KERNEL); kmalloc_caches[0].refcount = -1; caches++; + + hotplug_memory_notifier(slab_memory_callback, 1); #endif /* Able to allocate the per node structures */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 70d9b5da96ae..4e2c84fcf276 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2045,7 +2045,7 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) if (copy > 0) { if (copy > len) copy = len; - sg[elt].page = virt_to_page(skb->data + offset); + sg_set_page(&sg[elt], virt_to_page(skb->data + offset)); sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; sg[elt].length = copy; elt++; @@ -2065,7 +2065,7 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) if (copy > len) copy = len; - sg[elt].page = frag->page; + sg_set_page(&sg[elt], frag->page); sg[elt].offset = frag->page_offset+offset-start; sg[elt].length = copy; elt++; diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c index 72e6ab66834f..c796661a021b 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/ieee80211/ieee80211_crypt_tkip.c @@ -390,9 +390,7 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) icv[3] = crc >> 24; crypto_blkcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16); - sg.page = virt_to_page(pos); - sg.offset = offset_in_page(pos); - sg.length = len + 4; + sg_init_one(&sg, pos, len + 4); return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); } @@ -485,9 +483,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) plen = skb->len - hdr_len - 12; crypto_blkcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16); - sg.page = virt_to_page(pos); - sg.offset = offset_in_page(pos); - sg.length = plen + 4; + sg_init_one(&sg, pos, plen + 4); if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) { if (net_ratelimit()) { printk(KERN_DEBUG ": TKIP: failed to decrypt " @@ -539,11 +535,12 @@ static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr, printk(KERN_WARNING "michael_mic: tfm_michael == NULL\n"); return -1; } - sg[0].page = virt_to_page(hdr); + sg_init_table(sg, 2); + sg_set_page(&sg[0], virt_to_page(hdr)); sg[0].offset = offset_in_page(hdr); sg[0].length = 16; - sg[1].page = virt_to_page(data); + sg_set_page(&sg[1], virt_to_page(data)); sg[1].offset = offset_in_page(data); sg[1].length = data_len; diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c index 8d182459344e..0af6103d715c 100644 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ b/net/ieee80211/ieee80211_crypt_wep.c @@ -170,9 +170,7 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) icv[3] = crc >> 24; crypto_blkcipher_setkey(wep->tx_tfm, key, klen); - sg.page = virt_to_page(pos); - sg.offset = offset_in_page(pos); - sg.length = len + 4; + sg_init_one(&sg, pos, len + 4); return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); } @@ -212,9 +210,7 @@ static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) plen = skb->len - hdr_len - 8; crypto_blkcipher_setkey(wep->rx_tfm, key, klen); - sg.page = virt_to_page(pos); - sg.offset = offset_in_page(pos); - sg.length = plen + 4; + sg_init_one(&sg, pos, plen + 4); if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) return -7; diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 6675261e958f..cc806d640f7a 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -138,9 +138,7 @@ void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, *icv = cpu_to_le32(~crc32_le(~0, data, data_len)); crypto_blkcipher_setkey(tfm, rc4key, klen); - sg.page = virt_to_page(data); - sg.offset = offset_in_page(data); - sg.length = data_len + WEP_ICV_LEN; + sg_init_one(&sg, data, data_len + WEP_ICV_LEN); crypto_blkcipher_encrypt(&desc, &sg, &sg, sg.length); } @@ -204,9 +202,7 @@ int ieee80211_wep_decrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, __le32 crc; crypto_blkcipher_setkey(tfm, rc4key, klen); - sg.page = virt_to_page(data); - sg.offset = offset_in_page(data); - sg.length = data_len + WEP_ICV_LEN; + sg_init_one(&sg, data, data_len + WEP_ICV_LEN); crypto_blkcipher_decrypt(&desc, &sg, &sg, sg.length); crc = cpu_to_le32(~crc32_le(~0, data, data_len)); diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 781810724714..cbd64b216cce 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -726,7 +726,8 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc, /* set up scatter list */ end = skb_tail_pointer(skb); - sg.page = virt_to_page(auth); + sg_init_table(&sg, 1); + sg_set_page(&sg, virt_to_page(auth)); sg.offset = (unsigned long)(auth) % PAGE_SIZE; sg.length = end - (unsigned char *)auth; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index f983a369d4e2..d5a9785a158b 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1513,7 +1513,8 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, struct hash_desc desc; /* Sign the message. */ - sg.page = virt_to_page(&cookie->c); + sg_init_table(&sg, 1); + sg_set_page(&sg, virt_to_page(&cookie->c)); sg.offset = (unsigned long)(&cookie->c) % PAGE_SIZE; sg.length = bodysize; keylen = SCTP_SECRET_SIZE; @@ -1585,7 +1586,8 @@ struct sctp_association *sctp_unpack_cookie( /* Check the signature. */ keylen = SCTP_SECRET_SIZE; - sg.page = virt_to_page(bear_cookie); + sg_init_table(&sg, 1); + sg_set_page(&sg, virt_to_page(bear_cookie)); sg.offset = (unsigned long)(bear_cookie) % PAGE_SIZE; sg.length = bodysize; key = (char *)ep->secret_key[ep->current_key]; diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index bfb6a29633dd..32be431affcf 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -197,9 +197,9 @@ encryptor(struct scatterlist *sg, void *data) int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT; in_page = desc->pages[i]; } else { - in_page = sg->page; + in_page = sg_page(sg); } - desc->infrags[desc->fragno].page = in_page; + sg_set_page(&desc->infrags[desc->fragno], in_page); desc->fragno++; desc->fraglen += sg->length; desc->pos += sg->length; @@ -215,11 +215,11 @@ encryptor(struct scatterlist *sg, void *data) if (ret) return ret; if (fraglen) { - desc->outfrags[0].page = sg->page; + sg_set_page(&desc->outfrags[0], sg_page(sg)); desc->outfrags[0].offset = sg->offset + sg->length - fraglen; desc->outfrags[0].length = fraglen; desc->infrags[0] = desc->outfrags[0]; - desc->infrags[0].page = in_page; + sg_set_page(&desc->infrags[0], in_page); desc->fragno = 1; desc->fraglen = fraglen; } else { @@ -287,7 +287,7 @@ decryptor(struct scatterlist *sg, void *data) if (ret) return ret; if (fraglen) { - desc->frags[0].page = sg->page; + sg_set_page(&desc->frags[0], sg_page(sg)); desc->frags[0].offset = sg->offset + sg->length - fraglen; desc->frags[0].length = fraglen; desc->fragno = 1; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 6a59180e1667..3d1f7cdf9dd0 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1059,7 +1059,7 @@ xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, do { if (thislen > page_len) thislen = page_len; - sg->page = buf->pages[i]; + sg_set_page(sg, buf->pages[i]); sg->offset = page_offset; sg->length = thislen; ret = actor(sg, data); diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 5ced62c19c63..fb2220a719bd 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -552,7 +552,7 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, if (copy > len) copy = len; - sg.page = virt_to_page(skb->data + offset); + sg_set_page(&sg, virt_to_page(skb->data + offset)); sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; sg.length = copy; @@ -577,7 +577,7 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, if (copy > len) copy = len; - sg.page = frag->page; + sg_set_page(&sg, frag->page); sg.offset = frag->page_offset + offset-start; sg.length = copy; diff --git a/security/commoncap.c b/security/commoncap.c index 43f902750a1b..bf67871173ef 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -190,7 +190,8 @@ int cap_inode_killpriv(struct dentry *dentry) return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS); } -static inline int cap_from_disk(__le32 *caps, struct linux_binprm *bprm, +static inline int cap_from_disk(struct vfs_cap_data *caps, + struct linux_binprm *bprm, int size) { __u32 magic_etc; @@ -198,7 +199,7 @@ static inline int cap_from_disk(__le32 *caps, struct linux_binprm *bprm, if (size != XATTR_CAPS_SZ) return -EINVAL; - magic_etc = le32_to_cpu(caps[0]); + magic_etc = le32_to_cpu(caps->magic_etc); switch ((magic_etc & VFS_CAP_REVISION_MASK)) { case VFS_CAP_REVISION: @@ -206,8 +207,8 @@ static inline int cap_from_disk(__le32 *caps, struct linux_binprm *bprm, bprm->cap_effective = true; else bprm->cap_effective = false; - bprm->cap_permitted = to_cap_t( le32_to_cpu(caps[1]) ); - bprm->cap_inheritable = to_cap_t( le32_to_cpu(caps[2]) ); + bprm->cap_permitted = to_cap_t(le32_to_cpu(caps->permitted)); + bprm->cap_inheritable = to_cap_t(le32_to_cpu(caps->inheritable)); return 0; default: return -EINVAL; @@ -219,7 +220,7 @@ static int get_file_caps(struct linux_binprm *bprm) { struct dentry *dentry; int rc = 0; - __le32 v1caps[XATTR_CAPS_SZ]; + struct vfs_cap_data incaps; struct inode *inode; if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) { @@ -232,8 +233,14 @@ static int get_file_caps(struct linux_binprm *bprm) if (!inode->i_op || !inode->i_op->getxattr) goto out; - rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, &v1caps, - XATTR_CAPS_SZ); + rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0); + if (rc > 0) { + if (rc == XATTR_CAPS_SZ) + rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, + &incaps, XATTR_CAPS_SZ); + else + rc = -EINVAL; + } if (rc == -ENODATA || rc == -EOPNOTSUPP) { /* no data, that's ok */ rc = 0; @@ -242,7 +249,7 @@ static int get_file_caps(struct linux_binprm *bprm) if (rc < 0) goto out; - rc = cap_from_disk(v1caps, bprm, rc); + rc = cap_from_disk(&incaps, bprm, rc); if (rc) printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n", __FUNCTION__, rc, bprm->filename); |