diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-04 10:29:23 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-04 10:29:23 -0700 |
commit | 65b97fb7303050fc826e518cf67fc283da23314f (patch) | |
tree | 595e7f04d65d95a39d65bd2dcf2385b3b6ea7969 /arch/powerpc/platforms/pseries/nvram.c | |
parent | ddcf6600b133697adbafd96e080818bdc0dfd028 (diff) | |
parent | 1d8b368ab4aacfc3f864655baad4d31a3028ec1a (diff) | |
download | lwn-65b97fb7303050fc826e518cf67fc283da23314f.tar.gz lwn-65b97fb7303050fc826e518cf67fc283da23314f.zip |
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
Pull powerpc updates from Ben Herrenschmidt:
"This is the powerpc changes for the 3.11 merge window. In addition to
the usual bug fixes and small updates, the main highlights are:
- Support for transparent huge pages by Aneesh Kumar for 64-bit
server processors. This allows the use of 16M pages as transparent
huge pages on kernels compiled with a 64K base page size.
- Base VFIO support for KVM on power by Alexey Kardashevskiy
- Wiring up of our nvram to the pstore infrastructure, including
putting compressed oopses in there by Aruna Balakrishnaiah
- Move, rework and improve our "EEH" (basically PCI error handling
and recovery) infrastructure. It is no longer specific to pseries
but is now usable by the new "powernv" platform as well (no
hypervisor) by Gavin Shan.
- I fixed some bugs in our math-emu instruction decoding and made it
usable to emulate some optional FP instructions on processors with
hard FP that lack them (such as fsqrt on Freescale embedded
processors).
- Support for Power8 "Event Based Branch" facility by Michael
Ellerman. This facility allows what is basically "userspace
interrupts" for performance monitor events.
- A bunch of Transactional Memory vs. Signals bug fixes and HW
breakpoint/watchpoint fixes by Michael Neuling.
And more ... I appologize in advance if I've failed to highlight
something that somebody deemed worth it."
* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (156 commits)
pstore: Add hsize argument in write_buf call of pstore_ftrace_call
powerpc/fsl: add MPIC timer wakeup support
powerpc/mpic: create mpic subsystem object
powerpc/mpic: add global timer support
powerpc/mpic: add irq_set_wake support
powerpc/85xx: enable coreint for all the 64bit boards
powerpc/8xx: Erroneous double irq_eoi() on CPM IRQ in MPC8xx
powerpc/fsl: Enable CONFIG_E1000E in mpc85xx_smp_defconfig
powerpc/mpic: Add get_version API both for internal and external use
powerpc: Handle both new style and old style reserve maps
powerpc/hw_brk: Fix off by one error when validating DAWR region end
powerpc/pseries: Support compression of oops text via pstore
powerpc/pseries: Re-organise the oops compression code
pstore: Pass header size in the pstore write callback
powerpc/powernv: Fix iommu initialization again
powerpc/pseries: Inform the hypervisor we are using EBB regs
powerpc/perf: Add power8 EBB support
powerpc/perf: Core EBB support for 64-bit book3s
powerpc/perf: Drop MMCRA from thread_struct
powerpc/perf: Don't enable if we have zero events
...
Diffstat (limited to 'arch/powerpc/platforms/pseries/nvram.c')
-rw-r--r-- | arch/powerpc/platforms/pseries/nvram.c | 554 |
1 files changed, 453 insertions, 101 deletions
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 8733a86ad52e..9f8671a44551 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -18,6 +18,7 @@ #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/kmsg_dump.h> +#include <linux/pstore.h> #include <linux/ctype.h> #include <linux/zlib.h> #include <asm/uaccess.h> @@ -29,6 +30,13 @@ /* Max bytes to read/write in one go */ #define NVRW_CNT 0x20 +/* + * Set oops header version to distingush between old and new format header. + * lnx,oops-log partition max size is 4000, header version > 4000 will + * help in identifying new header. + */ +#define OOPS_HDR_VERSION 5000 + static unsigned int nvram_size; static int nvram_fetch, nvram_store; static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ @@ -45,20 +53,23 @@ struct nvram_os_partition { int min_size; /* minimum acceptable size (0 means req_size) */ long size; /* size of data portion (excluding err_log_info) */ long index; /* offset of data portion of partition */ + bool os_partition; /* partition initialized by OS, not FW */ }; static struct nvram_os_partition rtas_log_partition = { .name = "ibm,rtas-log", .req_size = 2079, .min_size = 1055, - .index = -1 + .index = -1, + .os_partition = true }; static struct nvram_os_partition oops_log_partition = { .name = "lnx,oops-log", .req_size = 4000, .min_size = 2000, - .index = -1 + .index = -1, + .os_partition = true }; static const char *pseries_nvram_os_partitions[] = { @@ -67,6 +78,12 @@ static const char *pseries_nvram_os_partitions[] = { NULL }; +struct oops_log_info { + u16 version; + u16 report_length; + u64 timestamp; +} __attribute__((packed)); + static void oops_to_nvram(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason); @@ -83,28 +100,28 @@ static unsigned long last_unread_rtas_event; /* timestamp */ * big_oops_buf[] holds the uncompressed text we're capturing. * - * oops_buf[] holds the compressed text, preceded by a prefix. - * The prefix is just a u16 holding the length of the compressed* text. - * (*Or uncompressed, if compression fails.) oops_buf[] gets written - * to NVRAM. + * oops_buf[] holds the compressed text, preceded by a oops header. + * oops header has u16 holding the version of oops header (to differentiate + * between old and new format header) followed by u16 holding the length of + * the compressed* text (*Or uncompressed, if compression fails.) and u64 + * holding the timestamp. oops_buf[] gets written to NVRAM. * - * oops_len points to the prefix. oops_data points to the compressed text. + * oops_log_info points to the header. oops_data points to the compressed text. * * +- oops_buf - * | +- oops_data - * v v - * +------------+-----------------------------------------------+ - * | length | text | - * | (2 bytes) | (oops_data_sz bytes) | - * +------------+-----------------------------------------------+ + * | +- oops_data + * v v + * +-----------+-----------+-----------+------------------------+ + * | version | length | timestamp | text | + * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes) | + * +-----------+-----------+-----------+------------------------+ * ^ - * +- oops_len + * +- oops_log_info * * We preallocate these buffers during init to avoid kmalloc during oops/panic. */ static size_t big_oops_buf_sz; static char *big_oops_buf, *oops_buf; -static u16 *oops_len; static char *oops_data; static size_t oops_data_sz; @@ -114,6 +131,30 @@ static size_t oops_data_sz; #define MEM_LEVEL 4 static struct z_stream_s stream; +#ifdef CONFIG_PSTORE +static struct nvram_os_partition of_config_partition = { + .name = "of-config", + .index = -1, + .os_partition = false +}; + +static struct nvram_os_partition common_partition = { + .name = "common", + .index = -1, + .os_partition = false +}; + +static enum pstore_type_id nvram_type_ids[] = { + PSTORE_TYPE_DMESG, + PSTORE_TYPE_PPC_RTAS, + PSTORE_TYPE_PPC_OF, + PSTORE_TYPE_PPC_COMMON, + -1 +}; +static int read_type; +static unsigned long last_rtas_event; +#endif + static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) { unsigned int i; @@ -275,48 +316,72 @@ int nvram_write_error_log(char * buff, int length, { int rc = nvram_write_os_partition(&rtas_log_partition, buff, length, err_type, error_log_cnt); - if (!rc) + if (!rc) { last_unread_rtas_event = get_seconds(); +#ifdef CONFIG_PSTORE + last_rtas_event = get_seconds(); +#endif + } + return rc; } -/* nvram_read_error_log +/* nvram_read_partition * - * Reads nvram for error log for at most 'length' + * Reads nvram partition for at most 'length' */ -int nvram_read_error_log(char * buff, int length, - unsigned int * err_type, unsigned int * error_log_cnt) +int nvram_read_partition(struct nvram_os_partition *part, char *buff, + int length, unsigned int *err_type, + unsigned int *error_log_cnt) { int rc; loff_t tmp_index; struct err_log_info info; - if (rtas_log_partition.index == -1) + if (part->index == -1) return -1; - if (length > rtas_log_partition.size) - length = rtas_log_partition.size; + if (length > part->size) + length = part->size; - tmp_index = rtas_log_partition.index; + tmp_index = part->index; - rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index); - if (rc <= 0) { - printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); - return rc; + if (part->os_partition) { + rc = ppc_md.nvram_read((char *)&info, + sizeof(struct err_log_info), + &tmp_index); + if (rc <= 0) { + pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__, + rc); + return rc; + } } rc = ppc_md.nvram_read(buff, length, &tmp_index); if (rc <= 0) { - printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); + pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__, rc); return rc; } - *error_log_cnt = info.seq_num; - *err_type = info.error_type; + if (part->os_partition) { + *error_log_cnt = info.seq_num; + *err_type = info.error_type; + } return 0; } +/* nvram_read_error_log + * + * Reads nvram for error log for at most 'length' + */ +int nvram_read_error_log(char *buff, int length, + unsigned int *err_type, unsigned int *error_log_cnt) +{ + return nvram_read_partition(&rtas_log_partition, buff, length, + err_type, error_log_cnt); +} + /* This doesn't actually zero anything, but it sets the event_logged * word to tell that this event is safely in syslog. */ @@ -405,6 +470,349 @@ static int __init pseries_nvram_init_os_partition(struct nvram_os_partition return 0; } +/* + * Are we using the ibm,rtas-log for oops/panic reports? And if so, + * would logging this oops/panic overwrite an RTAS event that rtas_errd + * hasn't had a chance to read and process? Return 1 if so, else 0. + * + * We assume that if rtas_errd hasn't read the RTAS event in + * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to. + */ +static int clobbering_unread_rtas_event(void) +{ + return (oops_log_partition.index == rtas_log_partition.index + && last_unread_rtas_event + && get_seconds() - last_unread_rtas_event <= + NVRAM_RTAS_READ_TIMEOUT); +} + +/* Derived from logfs_compress() */ +static int nvram_compress(const void *in, void *out, size_t inlen, + size_t outlen) +{ + int err, ret; + + ret = -EIO; + err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS, + MEM_LEVEL, Z_DEFAULT_STRATEGY); + if (err != Z_OK) + goto error; + + stream.next_in = in; + stream.avail_in = inlen; + stream.total_in = 0; + stream.next_out = out; + stream.avail_out = outlen; + stream.total_out = 0; + + err = zlib_deflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) + goto error; + + err = zlib_deflateEnd(&stream); + if (err != Z_OK) + goto error; + + if (stream.total_out >= stream.total_in) + goto error; + + ret = stream.total_out; +error: + return ret; +} + +/* Compress the text from big_oops_buf into oops_buf. */ +static int zip_oops(size_t text_len) +{ + struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; + int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len, + oops_data_sz); + if (zipped_len < 0) { + pr_err("nvram: compression failed; returned %d\n", zipped_len); + pr_err("nvram: logging uncompressed oops/panic report\n"); + return -1; + } + oops_hdr->version = OOPS_HDR_VERSION; + oops_hdr->report_length = (u16) zipped_len; + oops_hdr->timestamp = get_seconds(); + return 0; +} + +#ifdef CONFIG_PSTORE +/* Derived from logfs_uncompress */ +int nvram_decompress(void *in, void *out, size_t inlen, size_t outlen) +{ + int err, ret; + + ret = -EIO; + err = zlib_inflateInit(&stream); + if (err != Z_OK) + goto error; + + stream.next_in = in; + stream.avail_in = inlen; + stream.total_in = 0; + stream.next_out = out; + stream.avail_out = outlen; + stream.total_out = 0; + + err = zlib_inflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) + goto error; + + err = zlib_inflateEnd(&stream); + if (err != Z_OK) + goto error; + + ret = stream.total_out; +error: + return ret; +} + +static int unzip_oops(char *oops_buf, char *big_buf) +{ + struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; + u64 timestamp = oops_hdr->timestamp; + char *big_oops_data = NULL; + char *oops_data_buf = NULL; + size_t big_oops_data_sz; + int unzipped_len; + + big_oops_data = big_buf + sizeof(struct oops_log_info); + big_oops_data_sz = big_oops_buf_sz - sizeof(struct oops_log_info); + oops_data_buf = oops_buf + sizeof(struct oops_log_info); + + unzipped_len = nvram_decompress(oops_data_buf, big_oops_data, + oops_hdr->report_length, + big_oops_data_sz); + + if (unzipped_len < 0) { + pr_err("nvram: decompression failed; returned %d\n", + unzipped_len); + return -1; + } + oops_hdr = (struct oops_log_info *)big_buf; + oops_hdr->version = OOPS_HDR_VERSION; + oops_hdr->report_length = (u16) unzipped_len; + oops_hdr->timestamp = timestamp; + return 0; +} + +static int nvram_pstore_open(struct pstore_info *psi) +{ + /* Reset the iterator to start reading partitions again */ + read_type = -1; + return 0; +} + +/** + * nvram_pstore_write - pstore write callback for nvram + * @type: Type of message logged + * @reason: reason behind dump (oops/panic) + * @id: identifier to indicate the write performed + * @part: pstore writes data to registered buffer in parts, + * part number will indicate the same. + * @count: Indicates oops count + * @hsize: Size of header added by pstore + * @size: number of bytes written to the registered buffer + * @psi: registered pstore_info structure + * + * Called by pstore_dump() when an oops or panic report is logged in the + * printk buffer. + * Returns 0 on successful write. + */ +static int nvram_pstore_write(enum pstore_type_id type, + enum kmsg_dump_reason reason, + u64 *id, unsigned int part, int count, + size_t hsize, size_t size, + struct pstore_info *psi) +{ + int rc; + unsigned int err_type = ERR_TYPE_KERNEL_PANIC; + struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf; + + /* part 1 has the recent messages from printk buffer */ + if (part > 1 || type != PSTORE_TYPE_DMESG || + clobbering_unread_rtas_event()) + return -1; + + oops_hdr->version = OOPS_HDR_VERSION; + oops_hdr->report_length = (u16) size; + oops_hdr->timestamp = get_seconds(); + + if (big_oops_buf) { + rc = zip_oops(size); + /* + * If compression fails copy recent log messages from + * big_oops_buf to oops_data. + */ + if (rc != 0) { + size_t diff = size - oops_data_sz + hsize; + + if (size > oops_data_sz) { + memcpy(oops_data, big_oops_buf, hsize); + memcpy(oops_data + hsize, big_oops_buf + diff, + oops_data_sz - hsize); + + oops_hdr->report_length = (u16) oops_data_sz; + } else + memcpy(oops_data, big_oops_buf, size); + } else + err_type = ERR_TYPE_KERNEL_PANIC_GZ; + } + + rc = nvram_write_os_partition(&oops_log_partition, oops_buf, + (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type, + count); + + if (rc != 0) + return rc; + + *id = part; + return 0; +} + +/* + * Reads the oops/panic report, rtas, of-config and common partition. + * Returns the length of the data we read from each partition. + * Returns 0 if we've been called before. + */ +static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, + int *count, struct timespec *time, char **buf, + struct pstore_info *psi) +{ + struct oops_log_info *oops_hdr; + unsigned int err_type, id_no, size = 0; + struct nvram_os_partition *part = NULL; + char *buff = NULL, *big_buff = NULL; + int rc, sig = 0; + loff_t p; + +read_partition: + read_type++; + + switch (nvram_type_ids[read_type]) { + case PSTORE_TYPE_DMESG: + part = &oops_log_partition; + *type = PSTORE_TYPE_DMESG; + break; + case PSTORE_TYPE_PPC_RTAS: + part = &rtas_log_partition; + *type = PSTORE_TYPE_PPC_RTAS; + time->tv_sec = last_rtas_event; + time->tv_nsec = 0; + break; + case PSTORE_TYPE_PPC_OF: + sig = NVRAM_SIG_OF; + part = &of_config_partition; + *type = PSTORE_TYPE_PPC_OF; + *id = PSTORE_TYPE_PPC_OF; + time->tv_sec = 0; + time->tv_nsec = 0; + break; + case PSTORE_TYPE_PPC_COMMON: + sig = NVRAM_SIG_SYS; + part = &common_partition; + *type = PSTORE_TYPE_PPC_COMMON; + *id = PSTORE_TYPE_PPC_COMMON; + time->tv_sec = 0; + time->tv_nsec = 0; + break; + default: + return 0; + } + + if (!part->os_partition) { + p = nvram_find_partition(part->name, sig, &size); + if (p <= 0) { + pr_err("nvram: Failed to find partition %s, " + "err %d\n", part->name, (int)p); + return 0; + } + part->index = p; + part->size = size; + } + + buff = kmalloc(part->size, GFP_KERNEL); + + if (!buff) + return -ENOMEM; + + if (nvram_read_partition(part, buff, part->size, &err_type, &id_no)) { + kfree(buff); + return 0; + } + + *count = 0; + + if (part->os_partition) + *id = id_no; + + if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) { + oops_hdr = (struct oops_log_info *)buff; + *buf = buff + sizeof(*oops_hdr); + + if (err_type == ERR_TYPE_KERNEL_PANIC_GZ) { + big_buff = kmalloc(big_oops_buf_sz, GFP_KERNEL); + if (!big_buff) + return -ENOMEM; + + rc = unzip_oops(buff, big_buff); + + if (rc != 0) { + kfree(buff); + kfree(big_buff); + goto read_partition; + } + + oops_hdr = (struct oops_log_info *)big_buff; + *buf = big_buff + sizeof(*oops_hdr); + kfree(buff); + } + + time->tv_sec = oops_hdr->timestamp; + time->tv_nsec = 0; + return oops_hdr->report_length; + } + + *buf = buff; + return part->size; +} + +static struct pstore_info nvram_pstore_info = { + .owner = THIS_MODULE, + .name = "nvram", + .open = nvram_pstore_open, + .read = nvram_pstore_read, + .write = nvram_pstore_write, +}; + +static int nvram_pstore_init(void) +{ + int rc = 0; + + if (big_oops_buf) { + nvram_pstore_info.buf = big_oops_buf; + nvram_pstore_info.bufsize = big_oops_buf_sz; + } else { + nvram_pstore_info.buf = oops_data; + nvram_pstore_info.bufsize = oops_data_sz; + } + + rc = pstore_register(&nvram_pstore_info); + if (rc != 0) + pr_err("nvram: pstore_register() failed, defaults to " + "kmsg_dump; returned %d\n", rc); + + return rc; +} +#else +static int nvram_pstore_init(void) +{ + return -1; +} +#endif + static void __init nvram_init_oops_partition(int rtas_partition_exists) { int rc; @@ -425,9 +833,8 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists) oops_log_partition.name); return; } - oops_len = (u16*) oops_buf; - oops_data = oops_buf + sizeof(u16); - oops_data_sz = oops_log_partition.size - sizeof(u16); + oops_data = oops_buf + sizeof(struct oops_log_info); + oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info); /* * Figure compression (preceded by elimination of each line's <n> @@ -452,6 +859,11 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists) stream.workspace = NULL; } + rc = nvram_pstore_init(); + + if (!rc) + return; + rc = kmsg_dump_register(&nvram_kmsg_dumper); if (rc != 0) { pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc); @@ -501,70 +913,6 @@ int __init pSeries_nvram_init(void) return 0; } -/* - * Are we using the ibm,rtas-log for oops/panic reports? And if so, - * would logging this oops/panic overwrite an RTAS event that rtas_errd - * hasn't had a chance to read and process? Return 1 if so, else 0. - * - * We assume that if rtas_errd hasn't read the RTAS event in - * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to. - */ -static int clobbering_unread_rtas_event(void) -{ - return (oops_log_partition.index == rtas_log_partition.index - && last_unread_rtas_event - && get_seconds() - last_unread_rtas_event <= - NVRAM_RTAS_READ_TIMEOUT); -} - -/* Derived from logfs_compress() */ -static int nvram_compress(const void *in, void *out, size_t inlen, - size_t outlen) -{ - int err, ret; - - ret = -EIO; - err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS, - MEM_LEVEL, Z_DEFAULT_STRATEGY); - if (err != Z_OK) - goto error; - - stream.next_in = in; - stream.avail_in = inlen; - stream.total_in = 0; - stream.next_out = out; - stream.avail_out = outlen; - stream.total_out = 0; - - err = zlib_deflate(&stream, Z_FINISH); - if (err != Z_STREAM_END) - goto error; - - err = zlib_deflateEnd(&stream); - if (err != Z_OK) - goto error; - - if (stream.total_out >= stream.total_in) - goto error; - - ret = stream.total_out; -error: - return ret; -} - -/* Compress the text from big_oops_buf into oops_buf. */ -static int zip_oops(size_t text_len) -{ - int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len, - oops_data_sz); - if (zipped_len < 0) { - pr_err("nvram: compression failed; returned %d\n", zipped_len); - pr_err("nvram: logging uncompressed oops/panic report\n"); - return -1; - } - *oops_len = (u16) zipped_len; - return 0; -} /* * This is our kmsg_dump callback, called after an oops or panic report @@ -576,6 +924,7 @@ static int zip_oops(size_t text_len) static void oops_to_nvram(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) { + struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; static unsigned int oops_count = 0; static bool panicking = false; static DEFINE_SPINLOCK(lock); @@ -619,14 +968,17 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, } if (rc != 0) { kmsg_dump_rewind(dumper); - kmsg_dump_get_buffer(dumper, true, + kmsg_dump_get_buffer(dumper, false, oops_data, oops_data_sz, &text_len); err_type = ERR_TYPE_KERNEL_PANIC; - *oops_len = (u16) text_len; + oops_hdr->version = OOPS_HDR_VERSION; + oops_hdr->report_length = (u16) text_len; + oops_hdr->timestamp = get_seconds(); } (void) nvram_write_os_partition(&oops_log_partition, oops_buf, - (int) (sizeof(*oops_len) + *oops_len), err_type, ++oops_count); + (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type, + ++oops_count); spin_unlock_irqrestore(&lock, flags); } |