diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2015-02-11 15:15:11 +1030 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2015-02-11 16:47:36 +1030 |
commit | d7fbf6e95e2c5e7ef97c463a97499d7a2341fb09 (patch) | |
tree | eb883c2c3178fa297cfa473b9dd572e88dd0534a /tools | |
parent | 6a54f9ab0d65a2095de50160b8ca7ce6469aaac0 (diff) | |
download | lwn-d7fbf6e95e2c5e7ef97c463a97499d7a2341fb09.tar.gz lwn-d7fbf6e95e2c5e7ef97c463a97499d7a2341fb09.zip |
lguest: add PCI config space emulation to example launcher.
This handles ioport 0xCF8 and 0xCFC accesses, which are used to
read/write PCI device config space.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/lguest/lguest.c | 216 |
1 files changed, 211 insertions, 5 deletions
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index e52a3571076a..0f29657fc065 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -42,6 +42,7 @@ #include <pwd.h> #include <grp.h> #include <sys/user.h> +#include <linux/pci_regs.h> #ifndef VIRTIO_F_ANY_LAYOUT #define VIRTIO_F_ANY_LAYOUT 27 @@ -125,6 +126,21 @@ struct device_list { /* The list of Guest devices, based on command line arguments. */ static struct device_list devices; +/* This is the layout (little-endian) of the PCI config space. */ +struct pci_config { + u16 vendor_id, device_id; + u16 command, status; + u8 revid, prog_if, subclass, class; + u8 cacheline_size, lat_timer, header_type, bist; + u32 bar[6]; + u32 cardbus_cis_ptr; + u16 subsystem_vendor_id, subsystem_device_id; + u32 expansion_rom_addr; + u8 capabilities, reserved1[3]; + u32 reserved2; + u8 irq_line, irq_pin, min_grant, max_latency; +}; + /* The device structure describes a single device. */ struct device { /* The linked-list pointer. */ @@ -146,6 +162,15 @@ struct device { /* Is it operational */ bool running; + /* PCI configuration */ + union { + struct pci_config config; + u32 config_words[sizeof(struct pci_config) / sizeof(u32)]; + }; + + /* Device-specific config hangs off the end of this. */ + struct virtio_pci_mmio *mmio; + /* PCI MMIO resources (all in BAR0) */ size_t mmio_size; u32 mmio_addr; @@ -1173,6 +1198,169 @@ static void handle_output(unsigned long addr) strnlen(from_guest_phys(addr), guest_limit - addr)); } +/*L:217 + * We do PCI. This is mainly done to let us test the kernel virtio PCI + * code. + */ + +/* The IO ports used to read the PCI config space. */ +#define PCI_CONFIG_ADDR 0xCF8 +#define PCI_CONFIG_DATA 0xCFC + +/* + * Not really portable, but does help readability: this is what the Guest + * writes to the PCI_CONFIG_ADDR IO port. + */ +union pci_config_addr { + struct { + unsigned mbz: 2; + unsigned offset: 6; + unsigned funcnum: 3; + unsigned devnum: 5; + unsigned busnum: 8; + unsigned reserved: 7; + unsigned enabled : 1; + } bits; + u32 val; +}; + +/* + * We cache what they wrote to the address port, so we know what they're + * talking about when they access the data port. + */ +static union pci_config_addr pci_config_addr; + +static struct device *find_pci_device(unsigned int index) +{ + return devices.pci[index]; +} + +/* PCI can do 1, 2 and 4 byte reads; we handle that here. */ +static void ioread(u16 off, u32 v, u32 mask, u32 *val) +{ + assert(off < 4); + assert(mask == 0xFF || mask == 0xFFFF || mask == 0xFFFFFFFF); + *val = (v >> (off * 8)) & mask; +} + +/* PCI can do 1, 2 and 4 byte writes; we handle that here. */ +static void iowrite(u16 off, u32 v, u32 mask, u32 *dst) +{ + assert(off < 4); + assert(mask == 0xFF || mask == 0xFFFF || mask == 0xFFFFFFFF); + *dst &= ~(mask << (off * 8)); + *dst |= (v & mask) << (off * 8); +} + +/* + * Where PCI_CONFIG_DATA accesses depends on the previous write to + * PCI_CONFIG_ADDR. + */ +static struct device *dev_and_reg(u32 *reg) +{ + if (!pci_config_addr.bits.enabled) + return NULL; + + if (pci_config_addr.bits.funcnum != 0) + return NULL; + + if (pci_config_addr.bits.busnum != 0) + return NULL; + + if (pci_config_addr.bits.offset * 4 >= sizeof(struct pci_config)) + return NULL; + + *reg = pci_config_addr.bits.offset; + return find_pci_device(pci_config_addr.bits.devnum); +} + +/* Is this accessing the PCI config address port?. */ +static bool is_pci_addr_port(u16 port) +{ + return port >= PCI_CONFIG_ADDR && port < PCI_CONFIG_ADDR + 4; +} + +static bool pci_addr_iowrite(u16 port, u32 mask, u32 val) +{ + iowrite(port - PCI_CONFIG_ADDR, val, mask, + &pci_config_addr.val); + verbose("PCI%s: %#x/%x: bus %u dev %u func %u reg %u\n", + pci_config_addr.bits.enabled ? "" : " DISABLED", + val, mask, + pci_config_addr.bits.busnum, + pci_config_addr.bits.devnum, + pci_config_addr.bits.funcnum, + pci_config_addr.bits.offset); + return true; +} + +static void pci_addr_ioread(u16 port, u32 mask, u32 *val) +{ + ioread(port - PCI_CONFIG_ADDR, pci_config_addr.val, mask, val); +} + +/* Is this accessing the PCI config data port?. */ +static bool is_pci_data_port(u16 port) +{ + return port >= PCI_CONFIG_DATA && port < PCI_CONFIG_DATA + 4; +} + +static bool pci_data_iowrite(u16 port, u32 mask, u32 val) +{ + u32 reg, portoff; + struct device *d = dev_and_reg(®); + + /* Complain if they don't belong to a device. */ + if (!d) + return false; + + /* They can do 1 byte writes, etc. */ + portoff = port - PCI_CONFIG_DATA; + + /* + * PCI uses a weird way to determine the BAR size: the OS + * writes all 1's, and sees which ones stick. + */ + if (&d->config_words[reg] == &d->config.bar[0]) { + int i; + + iowrite(portoff, val, mask, &d->config.bar[0]); + for (i = 0; (1 << i) < d->mmio_size; i++) + d->config.bar[0] &= ~(1 << i); + return true; + } else if ((&d->config_words[reg] > &d->config.bar[0] + && &d->config_words[reg] <= &d->config.bar[6]) + || &d->config_words[reg] == &d->config.expansion_rom_addr) { + /* Allow writing to any other BAR, or expansion ROM */ + iowrite(portoff, val, mask, &d->config_words[reg]); + return true; + /* We let them overide latency timer and cacheline size */ + } else if (&d->config_words[reg] == (void *)&d->config.cacheline_size) { + /* Only let them change the first two fields. */ + if (mask == 0xFFFFFFFF) + mask = 0xFFFF; + iowrite(portoff, val, mask, &d->config_words[reg]); + return true; + } else if (&d->config_words[reg] == (void *)&d->config.command + && mask == 0xFFFF) { + /* Ignore command writes. */ + return true; + } + + /* Complain about other writes. */ + return false; +} + +static void pci_data_ioread(u16 port, u32 mask, u32 *val) +{ + u32 reg; + struct device *d = dev_and_reg(®); + + if (!d) + return; + ioread(port - PCI_CONFIG_DATA, d->config_words[reg], mask, val); +} + /*L:216 * This is where we emulate a handful of Guest instructions. It's ugly * and we used to do it in the kernel but it grew over time. @@ -1284,7 +1472,7 @@ static void emulate_insn(const u8 insn[]) unsigned int insnlen = 0, in = 0, small_operand = 0, byte_access; unsigned int eax, port, mask; /* - * We always return all-ones on IO port reads, which traditionally + * Default is to return all-ones on IO port reads, which traditionally * means "there's nothing there". */ u32 val = 0xFFFFFFFF; @@ -1359,10 +1547,6 @@ static void emulate_insn(const u8 insn[]) else mask = 0xFFFFFFFF; - /* This is the PS/2 keyboard status; 1 means ready for output */ - if (port == 0x64) - val = 1; - /* * If it was an "IN" instruction, they expect the result to be read * into %eax, so we change %eax. @@ -1370,12 +1554,30 @@ static void emulate_insn(const u8 insn[]) eax = getreg(eax); if (in) { + /* This is the PS/2 keyboard status; 1 means ready for output */ + if (port == 0x64) + val = 1; + else if (is_pci_addr_port(port)) + pci_addr_ioread(port, mask, &val); + else if (is_pci_data_port(port)) + pci_data_ioread(port, mask, &val); + /* Clear the bits we're about to read */ eax &= ~mask; /* Copy bits in from val. */ eax |= val & mask; /* Now update the register. */ setreg(eax, eax); + } else { + if (is_pci_addr_port(port)) { + if (!pci_addr_iowrite(port, mask, eax)) + goto bad_io; + } else if (is_pci_data_port(port)) { + if (!pci_data_iowrite(port, mask, eax)) + goto bad_io; + } + /* There are many other ports, eg. CMOS clock, serial + * and parallel ports, so we ignore them all. */ } verbose("IO %s of %x to %u: %#08x\n", @@ -1385,6 +1587,10 @@ skip_insn: setreg(eip, getreg(eip) + insnlen); return; +bad_io: + warnx("Attempt to %s port %u (%#x mask)", + in ? "read from" : "write to", port, mask); + no_emulate: /* Inject trap into Guest. */ if (write(lguest_fd, args, sizeof(args)) < 0) |