* [1/4] [PATCH]Diskdump - yet another crash dump function
2004-05-27 9:33 [PATCH]Diskdump - yet another crash dump function Takao Indoh
@ 2004-05-27 12:36 ` Takao Indoh
2004-05-27 12:37 ` [2/4] " Takao Indoh
` (4 subsequent siblings)
5 siblings, 0 replies; 38+ messages in thread
From: Takao Indoh @ 2004-05-27 12:36 UTC (permalink / raw)
To: linux-kernel
Hi,
I forgot to attach patches.
This is a patch for diskcump common layer.
Best Regards,
Takao Indoh
diff -Nur linux-2.6.6.org/arch/i386/kernel/nmi.c linux-2.6.6/arch/i386/kernel/nmi.c
--- linux-2.6.6.org/arch/i386/kernel/nmi.c 2004-05-20 08:58:15.000000000 +0900
+++ linux-2.6.6/arch/i386/kernel/nmi.c 2004-05-27 09:52:45.000000000 +0900
@@ -517,3 +517,4 @@
EXPORT_SYMBOL(release_lapic_nmi);
EXPORT_SYMBOL(disable_timer_nmi_watchdog);
EXPORT_SYMBOL(enable_timer_nmi_watchdog);
+EXPORT_SYMBOL(touch_nmi_watchdog);
diff -Nur linux-2.6.6.org/arch/i386/kernel/reboot.c linux-2.6.6/arch/i386/kernel/reboot.c
--- linux-2.6.6.org/arch/i386/kernel/reboot.c 2004-05-20 08:58:15.000000000 +0900
+++ linux-2.6.6/arch/i386/kernel/reboot.c 2004-05-27 09:52:45.000000000 +0900
@@ -252,7 +252,8 @@
* Stop all CPUs and turn off local APICs and the IO-APIC, so
* other OSs see a clean IRQ state.
*/
- smp_send_stop();
+ if (!crashdump_mode())
+ smp_send_stop();
#elif defined(CONFIG_X86_LOCAL_APIC)
if (cpu_has_apic) {
local_irq_disable();
diff -Nur linux-2.6.6.org/arch/i386/kernel/traps.c linux-2.6.6/arch/i386/kernel/traps.c
--- linux-2.6.6.org/arch/i386/kernel/traps.c 2004-05-20 08:58:15.000000000 +0900
+++ linux-2.6.6/arch/i386/kernel/traps.c 2004-05-27 09:52:45.000000000 +0900
@@ -258,7 +258,8 @@
int nl = 0;
console_verbose();
- spin_lock_irq(&die_lock);
+ if (!crashdump_mode())
+ spin_lock_irq(&die_lock);
bust_spinlocks(1);
handle_BUG(regs);
printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
@@ -277,6 +278,7 @@
if (nl)
printk("\n");
show_registers(regs);
+ try_crashdump(regs);
bust_spinlocks(0);
spin_unlock_irq(&die_lock);
if (in_interrupt())
diff -Nur linux-2.6.6.org/arch/i386/mm/init.c linux-2.6.6/arch/i386/mm/init.c
--- linux-2.6.6.org/arch/i386/mm/init.c 2004-05-20 08:58:15.000000000 +0900
+++ linux-2.6.6/arch/i386/mm/init.c 2004-05-27 09:52:45.000000000 +0900
@@ -168,7 +168,7 @@
extern int is_available_memory(efi_memory_desc_t *);
-static inline int page_is_ram(unsigned long pagenr)
+int page_is_ram(unsigned long pagenr)
{
int i;
unsigned long addr, end;
@@ -205,6 +205,7 @@
}
return 0;
}
+EXPORT_SYMBOL(page_is_ram);
#ifdef CONFIG_HIGHMEM
pte_t *kmap_pte;
diff -Nur linux-2.6.6.org/drivers/block/Kconfig linux-2.6.6/drivers/block/Kconfig
--- linux-2.6.6.org/drivers/block/Kconfig 2004-05-20 08:58:52.000000000 +0900
+++ linux-2.6.6/drivers/block/Kconfig 2004-05-27 09:52:45.000000000 +0900
@@ -346,6 +346,11 @@
your machine, or if you want to have a raid or loopback device
bigger than 2TB. Otherwise say N.
+config DISKDUMP
+ tristate "Disk dump support"
+ ---help---
+ Disk dump support.
+
source "drivers/s390/block/Kconfig"
endmenu
diff -Nur linux-2.6.6.org/drivers/block/Makefile linux-2.6.6/drivers/block/Makefile
--- linux-2.6.6.org/drivers/block/Makefile 2004-05-20 08:58:52.000000000 +0900
+++ linux-2.6.6/drivers/block/Makefile 2004-05-27 09:52:45.000000000 +0900
@@ -43,3 +43,4 @@
obj-$(CONFIG_VIODASD) += viodasd.o
obj-$(CONFIG_BLK_DEV_CARMEL) += carmel.o
+obj-$(CONFIG_DISKDUMP) += diskdump.o diskdumplib.o
diff -Nur linux-2.6.6.org/drivers/block/diskdump.c linux-2.6.6/drivers/block/diskdump.c
--- linux-2.6.6.org/drivers/block/diskdump.c 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.6/drivers/block/diskdump.c 2004-05-27 09:57:52.000000000 +0900
@@ -0,0 +1,971 @@
+/*
+ * linux/drivers/block/diskdump.c
+ *
+ * Copyright (C) 2004 FUJITSU LIMITED
+ * Copyright (C) 2002 Red Hat, Inc.
+ * Written by Nobuhiro Tachino (ntachino@jp.fujitsu.com)
+ *
+ * Some codes were derived from netdump and copyright belongs to
+ * Red Hat, Inc.
+ */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/file.h>
+#include <linux/reboot.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/highmem.h>
+#include <linux/utsname.h>
+#include <linux/console.h>
+#include <linux/smp_lock.h>
+#include <linux/nmi.h>
+#include <linux/genhd.h>
+#include <linux/crc32.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/diskdump.h>
+#include <linux/diskdumplib.h>
+#include <asm/diskdump.h>
+
+#define DEBUG 0
+#if DEBUG
+# define Dbg(x, ...) printk(KERN_INFO "disk_dump:" x "\n", ## __VA_ARGS__)
+#else
+# define Dbg(x...)
+#endif
+
+#define Err(x, ...) printk(KERN_ERR "disk_dump: " x "\n", ## __VA_ARGS__);
+#define Warn(x, ...) printk(KERN_WARNING "disk_dump: " x "\n", ## __VA_ARGS__)
+#define Info(x, ...) printk(x "\n", ## __VA_ARGS__)
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#define ROUNDUP(x, y) (((x) + ((y)-1))/(y))
+
+/* 512byte sectors to blocks */
+#define SECTOR_BLOCK(s) ((s) >> (DUMP_BLOCK_SHIFT - 9))
+
+static unsigned int fallback_on_err = 1;
+static unsigned int allow_risky_dumps = 1;
+static unsigned int block_order = 2;
+static unsigned int sample_rate = 8;
+MODULE_PARM(fallback_on_err, "i");
+MODULE_PARM(allow_risky_dumps, "i");
+MODULE_PARM(block_order, "i");
+MODULE_PARM(sample_rate, "i");
+
+static unsigned long timestamp_1sec;
+static uint32_t module_crc;
+static char *scratch;
+static struct disk_dump_header dump_header;
+
+/* Registered dump devices */
+static LIST_HEAD(disk_dump_devices);
+
+/* Registered dump types, e.g. SCSI, ... */
+static LIST_HEAD(disk_dump_types);
+
+static spinlock_t disk_dump_lock = SPIN_LOCK_UNLOCKED;
+
+
+/* Save the addresses of task_struct of each frozen CPU */
+static struct task_struct *tasks[NR_CPUS];
+
+static unsigned int header_blocks; /* The size of all headers */
+static unsigned int bitmap_blocks; /* The size of bitmap header */
+static unsigned int total_ram_blocks; /* The size of memory */
+static unsigned int total_blocks; /* The sum of above */
+
+struct notifier_block *disk_dump_notifier_list;
+
+extern int panic_timeout;
+
+#if CONFIG_SMP
+static void freeze_cpu(void *dummy)
+{
+ unsigned int cpu = smp_processor_id();
+
+ tasks[cpu] = current;
+
+ for (;;) local_irq_disable();
+}
+#endif
+
+static int lapse = 0; /* 200msec unit */
+
+static inline unsigned long eta(unsigned long nr, unsigned long maxnr)
+{
+ unsigned long long eta;
+
+ eta = ((maxnr << 8) / nr) * (unsigned long long)lapse;
+
+ return (unsigned long)(eta >> 8) - lapse;
+}
+
+static inline void print_status(unsigned int nr, unsigned int maxnr)
+{
+ static char *spinner = "/|\\-";
+ static unsigned long long prev_timestamp = 0;
+ unsigned long long timestamp;
+
+ platform_timestamp(timestamp);
+
+ if (timestamp - prev_timestamp > (timestamp_1sec/5)) {
+ prev_timestamp = timestamp;
+ lapse++;
+ printk("%u/%u %lu ETA %c \r",
+ nr, maxnr, eta(nr, maxnr) / 5, spinner[lapse & 3]);
+ }
+}
+
+static inline void clear_status(int nr, int maxnr)
+{
+ printk(" \r");
+ lapse = 0;
+}
+
+/*
+ * Checking the signature on a block. The format is as follows.
+ *
+ * 1st word = 'disk'
+ * 2nd word = 'dump'
+ * 3rd word = block number
+ * 4th word = ((block number + 7) * 11) & 0xffffffff
+ * 5th word = ((4th word + 7)* 11) & 0xffffffff
+ * ..
+ *
+ * Return TRUE if the signature is correct, else return FALSE
+ */
+static int check_block_signature(void *buf, unsigned int block_nr)
+{
+ int word_nr = PAGE_SIZE / sizeof(int);
+ int *words = buf;
+ unsigned int val;
+ int i;
+
+ if (memcmp(buf, DUMP_PARTITION_SIGNATURE, sizeof(*words)))
+ return FALSE;
+
+ val = block_nr;
+ for (i = 2; i < word_nr; i++) {
+ if (words[i] != val)
+ return FALSE;
+ val = (val + 7) * 11;
+ }
+
+ return TRUE;
+}
+
+/*
+ * Read one block into the dump partition
+ */
+static int read_blocks(struct disk_dump_partition *dump_part, unsigned int nr, char *buf, int len)
+{
+ struct disk_dump_device *device = dump_part->device;
+ int ret;
+
+ touch_nmi_watchdog();
+ ret = device->ops.rw_block(dump_part, READ, nr, buf, len);
+ if (ret < 0) {
+ Err("read error on block %u", nr);
+ return ret;
+ }
+ return 0;
+}
+
+static int write_blocks(struct disk_dump_partition *dump_part, unsigned int offs, char *buf, int len)
+{
+ struct disk_dump_device *device = dump_part->device;
+ int ret;
+
+ touch_nmi_watchdog();
+ ret = device->ops.rw_block(dump_part, WRITE, offs, buf, len);
+ if (ret < 0) {
+ Err("write error on block %u", offs);
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * Initialize the common header
+ */
+
+/*
+ * Write the common header
+ */
+static int write_header(struct disk_dump_partition *dump_part)
+{
+ memset(scratch, '\0', PAGE_SIZE);
+ memcpy(scratch, &dump_header, sizeof(dump_header));
+
+ return write_blocks(dump_part, 1, scratch, 1);
+}
+
+/*
+ * Check the signaures in all blocks of the dump partition
+ * Return TRUE if the signature is correct, else return FALSE
+ */
+static int check_dump_partition(struct disk_dump_partition *dump_part, unsigned int partition_size)
+{
+ unsigned int blk;
+ int ret;
+ unsigned int chunk_blks, skips;
+ int i;
+
+ if (sample_rate < 0) /* No check */
+ return TRUE;
+
+ /*
+ * If the device has limitations of transfer size, use it.
+ */
+ chunk_blks = 1 << block_order;
+ if (dump_part->device->max_blocks)
+ chunk_blks = min(chunk_blks, dump_part->device->max_blocks);
+ skips = chunk_blks << sample_rate;
+
+ lapse = 0;
+ for (blk = 0; blk < partition_size; blk += skips) {
+ unsigned int len;
+redo:
+ len = min(chunk_blks, partition_size - blk);
+ if ((ret = read_blocks(dump_part, blk, scratch, len)) < 0)
+ return FALSE;
+ print_status(blk + 1, partition_size);
+ for (i = 0; i < len; i++)
+ if (!check_block_signature(scratch + i * DUMP_BLOCK_SIZE, blk + i)) {
+ Err("bad signature in block %u", blk + i);
+ return FALSE;
+ }
+ }
+ /* Check the end of the dump partition */
+ if (blk - skips + chunk_blks < partition_size) {
+ blk = partition_size - chunk_blks;
+ goto redo;
+ }
+ clear_status(blk, partition_size);
+ return TRUE;
+}
+
+/*
+ * Write memory bitmap after location of dump headers.
+ */
+#define IDX2PAGENR(nr, byte, bit) (((nr) * PAGE_SIZE + (byte)) * 8 + (bit))
+static int write_bitmap(struct disk_dump_partition *dump_part, unsigned int bitmap_offset, unsigned int bitmap_blocks)
+{
+ unsigned int nr;
+ int bit, byte;
+ int ret = 0;
+ unsigned char val;
+
+ for (nr = 0; nr < bitmap_blocks; nr++) {
+ for (byte = 0; byte < PAGE_SIZE; byte++) {
+ val = 0;
+ for (bit = 0; bit < 8; bit++)
+ if (page_is_ram(IDX2PAGENR(nr, byte, bit)))
+ val |= (1 << bit);
+ scratch[byte] = (char)val;
+ }
+ if ((ret = write_blocks(dump_part, bitmap_offset + nr, scratch, 1)) < 0) {
+ Err("I/O error %d on block %u", ret, bitmap_offset + nr);
+ break;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Write whole memory to dump partition.
+ * Return value is the number of writen blocks.
+ */
+static int write_memory(struct disk_dump_partition *dump_part, int offset, unsigned int max_blocks_written, unsigned int *blocks_written)
+{
+ char *kaddr;
+ unsigned int blocks = 0;
+ struct page *page;
+ unsigned int nr;
+ int ret = 0;
+ int blk_in_chunk = 0;
+
+ for (nr = 0; nr < max_mapnr; nr++) {
+ if (!page_is_ram(nr))
+ continue;
+
+ if (blocks >= max_blocks_written) {
+ Warn("dump device is too small. %lu pages were not saved", max_mapnr - blocks);
+ goto out;
+ }
+ page = mem_map + nr;
+ kaddr = (char *)kmap_atomic(page, KM_DISKDUMP);
+ /*
+ * need to copy because adapter drivers use virt_to_bus()
+ */
+ memcpy(scratch + blk_in_chunk * PAGE_SIZE, kaddr, PAGE_SIZE);
+ blk_in_chunk++;
+ blocks++;
+ kunmap_atomic(kaddr, KM_DISKDUMP);
+
+ if (blk_in_chunk >= (1 << block_order)) {
+ ret = write_blocks(dump_part, offset, scratch, blk_in_chunk);
+ if (ret < 0) {
+ Err("I/O error %d on block %u", ret, offset);
+ break;
+ }
+ offset += blk_in_chunk;
+ blk_in_chunk = 0;
+ print_status(blocks, max_blocks_written);
+ }
+ }
+ if (ret >= 0 && blk_in_chunk > 0) {
+ ret = write_blocks(dump_part, offset, scratch, blk_in_chunk);
+ if (ret < 0)
+ Err("I/O error %d on block %u", ret, offset);
+ }
+
+out:
+ clear_status(nr, max_blocks_written);
+
+ *blocks_written = blocks;
+ return ret;
+}
+
+/*
+ * Select most suitable dump device. sanity_check() returns the state
+ * of each dump device. 0 means OK, negative value means NG, and
+ * positive value means it maybe work. select_dump_partition() first
+ * try to select a sane device and if it has no sane device and
+ * allow_risky_dumps is set, it select one from maybe OK devices.
+ *
+ * XXX We cannot handle multiple partitions yet.
+ */
+static struct disk_dump_partition *select_dump_partition(void)
+{
+ struct disk_dump_device *dump_device;
+ struct disk_dump_partition *dump_part;
+ int sanity;
+ int strict_check = 1;
+
+redo:
+ /*
+ * Select a sane polling driver.
+ */
+ list_for_each_entry(dump_device, &disk_dump_devices, list) {
+ sanity = 0;
+ if (dump_device->ops.sanity_check)
+ sanity = dump_device->ops.sanity_check(dump_device);
+ if (sanity < 0 || (sanity > 0 && strict_check))
+ continue;
+ list_for_each_entry(dump_part, &dump_device->partitions, list)
+ return dump_part;
+ }
+ if (allow_risky_dumps && strict_check) {
+ strict_check = 0;
+ goto redo;
+ }
+ return NULL;
+}
+
+static void disk_dump(struct pt_regs *regs)
+{
+ unsigned long flags;
+ int ret = -EIO;
+ struct pt_regs myregs;
+ unsigned int max_written_blocks, written_blocks;
+ int i;
+ struct disk_dump_device *dump_device = NULL;
+ struct disk_dump_partition *dump_part = NULL;
+
+ /* Inhibit interrupt and stop other CPUs */
+ local_save_flags(flags);
+ local_irq_disable();
+
+ diskdump_lib_init();
+
+ /*
+ * Check the checksum of myself
+ */
+ spin_trylock(&disk_dump_lock);
+ if (!check_crc_module()) {
+ Err("checksum error. diskdump common module may be compromised.");
+ goto done;
+ }
+
+ diskdump_mode = 1;
+
+ Dbg("notify dump start.");
+ notifier_call_chain(&disk_dump_notifier_list, 0, NULL);
+
+ tasks[smp_processor_id()] = current;
+#if CONFIG_SMP
+ smp_call_function(freeze_cpu, NULL, 1, 0);
+ mdelay(3000);
+ printk("CPU frozen: ");
+ for (i = 0; i < NR_CPUS; i++) {
+ if (tasks[i] != NULL)
+ printk("#%d", i);
+
+ }
+ printk("\n");
+ printk("CPU#%d is executing diskdump.\n", smp_processor_id());
+#else
+ mdelay(1000);
+#endif
+
+ platform_fix_regs();
+
+ if (list_empty(&disk_dump_devices)) {
+ Err("adapter driver is not registered.");
+ goto done;
+ }
+
+ Info("start dumping");
+
+ if (!(dump_part = select_dump_partition())) {
+ Err("No sane dump device found");
+ goto done;
+ }
+ dump_device = dump_part->device;
+
+ /*
+ * Stop ongoing I/O with polling driver and make the shift to I/O mode
+ * for dump
+ */
+ Dbg("do quiesce");
+ if (dump_device->ops.quiesce)
+ if ((ret = dump_device->ops.quiesce(dump_device)) < 0) {
+ Err("quiesce failed. error %d", ret);
+ goto done;
+ }
+
+ if (SECTOR_BLOCK(dump_part->nr_sects) < header_blocks + bitmap_blocks) {
+ Warn("dump partition is too small. Aborted");
+ goto done;
+ }
+
+ /* Check dump partition */
+ Info("check dump partition...");
+ if (!check_dump_partition(dump_part, total_blocks)) {
+ Err("check partition failed.");
+ goto done;
+ }
+
+ /*
+ * Write the common header
+ */
+ memcpy(dump_header.signature, DISK_DUMP_SIGNATURE, sizeof(dump_header.signature));
+ dump_header.utsname = system_utsname;
+ dump_header.timestamp = xtime;
+ dump_header.status = DUMP_HEADER_INCOMPLETED;
+ dump_header.block_size = PAGE_SIZE;
+ dump_header.sub_hdr_size = size_of_sub_header();
+ dump_header.bitmap_blocks = bitmap_blocks;
+ dump_header.max_mapnr = max_mapnr;
+ dump_header.total_ram_blocks = total_ram_blocks;
+ dump_header.device_blocks = SECTOR_BLOCK(dump_part->nr_sects);
+ dump_header.written_blocks = 0;
+
+ Dbg("write header");
+ write_header(dump_part);
+
+ /*
+ * Write the architecture dependent header
+ */
+ Dbg("write sub header");
+ if ((ret = write_sub_header()) < 0) {
+ Err("writing header failed. error %d", ret);
+ goto done;
+ }
+
+ Dbg("writing memory bitmaps..");
+ if ((ret = write_bitmap(dump_part, header_blocks, bitmap_blocks)) < 0)
+ goto done;
+
+ max_written_blocks = total_ram_blocks;
+ if (dump_header.device_blocks < total_blocks) {
+ Warn("dump partition is too small. actual blocks %u. expected blocks %u. whole memory will not be saved",
+ dump_header.device_blocks, total_blocks);
+ max_written_blocks -= (total_blocks - dump_header.device_blocks);
+ }
+
+ Info("dumping memory..");
+ if ((ret = write_memory(dump_part, header_blocks + bitmap_blocks,
+ max_written_blocks, &written_blocks)) < 0)
+ goto done;
+
+ /*
+ * Set the number of block that is written into and write it
+ * into partition again.
+ */
+ dump_header.written_blocks = written_blocks;
+ write_header(dump_part);
+
+ ret = 0;
+
+done:
+ Dbg("do adapter shutdown.");
+ if (dump_device && dump_device->ops.shutdown)
+ if (dump_device->ops.shutdown(dump_device))
+ Err("adapter shutdown failed.");
+
+ /*
+ * If diskdump failed and fallback_on_err is set,
+ * We just return and leave panic to netdump.
+ */
+ if (fallback_on_err && ret != 0)
+ return;
+
+ Dbg("notify panic.");
+ notifier_call_chain(&panic_notifier_list, 0, NULL);
+
+ diskdump_lib_exit();
+
+ if (panic_timeout > 0) {
+ int i;
+
+ printk(KERN_EMERG "Rebooting in %d second%s..",
+ panic_timeout, "s" + (panic_timeout == 1));
+ for (i = 0; i < panic_timeout; i++) {
+ touch_nmi_watchdog();
+ mdelay(1000);
+ }
+ printk("\n");
+ machine_restart(NULL);
+ }
+ printk(KERN_EMERG "halt\n");
+ for (;;) {
+ touch_nmi_watchdog();
+ machine_halt();
+ mdelay(1000);
+ }
+}
+
+static struct disk_dump_partition *find_dump_partition(dev_t dev)
+{
+ struct disk_dump_device *dump_device;
+ struct disk_dump_partition *dump_part;
+
+ list_for_each_entry(dump_device, &disk_dump_devices, list)
+ list_for_each_entry(dump_part, &dump_device->partitions, list)
+ if (dump_part->dentry->d_inode->i_rdev == dev)
+ return dump_part;
+ return NULL;
+}
+
+static struct disk_dump_device *find_dump_device(void *real_device)
+{
+ struct disk_dump_device *dump_device;
+
+ list_for_each_entry(dump_device, &disk_dump_devices, list)
+ if (real_device == dump_device->device)
+ return dump_device;
+ return NULL;
+}
+
+static void *find_real_device(dev_t dev, struct disk_dump_type **_dump_type)
+{
+ void *real_device;
+ struct disk_dump_type *dump_type;
+
+ list_for_each_entry(dump_type, &disk_dump_types, list)
+ if ((real_device = dump_type->probe(dev)) != NULL) {
+ *_dump_type = dump_type;
+ return real_device;
+ }
+ return NULL;
+}
+
+/*
+ * Add dump partition structure corresponding to file to the dump device
+ * structure.
+ */
+static int add_dump_partition(struct disk_dump_device *dump_device, struct file *file)
+{
+ struct disk_dump_partition *dump_part;
+ struct inode *inode = file->f_dentry->d_inode;
+ dev_t dev = inode->i_rdev;
+ struct gendisk *gd;
+ int part;
+ char buffer[32];
+
+ if (!(dump_part = kmalloc(sizeof(*dump_part), GFP_KERNEL)))
+ return -ENOMEM;
+
+ dump_part->device = dump_device;
+ dump_part->vfsmount = mntget(file->f_vfsmnt);
+ dump_part->dentry = dget(file->f_dentry);
+
+ gd = get_gendisk(inode->i_rdev, &part);
+ if (gd) part--;
+ if ((!gd) || (!gd->part) || (!gd->part[part]))
+ return -EINVAL;
+ dump_part->nr_sects = gd->part[part]->nr_sects;
+ dump_part->start_sect = gd->part[part]->start_sect;
+
+ if (SECTOR_BLOCK(dump_part->nr_sects) < total_blocks) {
+ format_dev_t(buffer, dev);
+ Warn("%s is too small to save whole system memory\n", buffer);
+ }
+
+ list_add(&dump_part->list, &dump_device->partitions);
+
+ return 0;
+}
+
+/*
+ * Add dump partition corresponding to file.
+ * Must be called with disk_dump_lock held.
+ */
+static int add_dump(struct file *file)
+{
+ struct disk_dump_type *dump_type = NULL;
+ struct disk_dump_device *dump_device;
+ void *real_device;
+ dev_t dev = file->f_dentry->d_inode->i_rdev;
+ int ret;
+
+ /* Check whether this inode is already registered */
+ if (find_dump_partition(dev))
+ return -EEXIST;
+
+ /* find dump_type and real device for this inode */
+ if (!(real_device = find_real_device(dev, &dump_type)))
+ return -ENXIO;
+
+ dump_device = find_dump_device(real_device);
+ if (dump_device == NULL) {
+ /* real_device is not registered. create new dump_device */
+ if (!(dump_device = kmalloc(sizeof(*dump_device), GFP_KERNEL)))
+ return -ENOMEM;
+
+ memset(dump_device, 0, sizeof(*dump_device));
+ INIT_LIST_HEAD(&dump_device->partitions);
+
+ dump_device->dump_type = dump_type;
+ dump_device->device = real_device;
+ if ((ret = dump_type->add_device(dump_device)) < 0) {
+ kfree(dump_device);
+ return ret;
+ }
+ if (!try_module_get(dump_type->owner))
+ return -EINVAL;
+ list_add(&dump_device->list, &disk_dump_devices);
+ }
+
+ ret = add_dump_partition(dump_device, file);
+ if (ret < 0) {
+ dump_type->remove_device(dump_device);
+ module_put(dump_type->owner);
+ list_del(&dump_device->list);
+ kfree(dump_device);
+ }
+
+ return ret;
+}
+
+/*
+ * Remove user specified dump partition.
+ * Must be called with disk_dump_lock held.
+ */
+static int remove_dump(dev_t dev)
+{
+ struct disk_dump_device *dump_device;
+ struct disk_dump_partition *dump_part;
+ struct disk_dump_type *dump_type;
+
+ if (!(dump_part = find_dump_partition(dev)))
+ return -ENOENT;
+
+ dump_device = dump_part->device;
+
+ list_del(&dump_part->list);
+ mntput(dump_part->vfsmount);
+ dput(dump_part->dentry);
+ kfree(dump_part);
+
+ if (list_empty(&dump_device->partitions)) {
+ dump_type = dump_device->dump_type;
+ dump_type->remove_device(dump_device);
+ module_put(dump_type->owner);
+ module_put(dump_type->owner);
+ list_del(&dump_device->list);
+ kfree(dump_device);
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+static int proc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long param)
+{
+ int fd = (int)param;
+ int ret;
+ struct file *dump_file;
+ struct inode *dump_inode;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ dump_file = fget(fd);
+ if (!dump_file)
+ return -EBADF;
+ dump_inode = dump_file->f_dentry->d_inode;
+ if (!S_ISBLK(dump_inode->i_mode)) {
+ fput(dump_file);
+ return -EBADF;
+ }
+
+ spin_lock(&disk_dump_lock);
+ switch (cmd) {
+ case BLKADDDUMPDEVICE:
+ ret = add_dump(dump_file);
+ break;
+ case BLKREMOVEDUMPDEVICE:
+ ret = remove_dump(dump_inode->i_rdev);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ set_crc_modules();
+ spin_unlock(&disk_dump_lock);
+
+ fput(dump_file);
+
+ return ret;
+}
+
+static void *disk_dump_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct disk_dump_device *dump_device;
+ struct disk_dump_partition *dump_part;
+ loff_t n = *pos;
+
+ spin_lock(&disk_dump_lock);
+ list_for_each_entry(dump_device, &disk_dump_devices, list) {
+ seq->private = dump_device;
+ list_for_each_entry(dump_part, &dump_device->partitions, list) {
+ if (!n--)
+ return dump_part;
+ }
+ }
+ return NULL;
+}
+
+static void *disk_dump_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct list_head *partition = v;
+ struct list_head *device = seq->private;
+ struct disk_dump_device *dump_device;
+
+ dump_device = list_entry(device, struct disk_dump_device, list);
+
+ (*pos)++;
+ partition = partition->next;
+ if (partition != &dump_device->partitions)
+ return partition;
+
+ device = device->next;
+ seq->private = device;
+ if (device == &disk_dump_devices)
+ return NULL;
+
+ dump_device = list_entry(device, struct disk_dump_device, list);
+
+ return dump_device->partitions.next;
+}
+
+static void disk_dump_seq_stop(struct seq_file *seq, void *v)
+{
+ spin_unlock(&disk_dump_lock);
+}
+
+static int disk_dump_seq_show(struct seq_file *seq, void *v)
+{
+ struct disk_dump_partition *dump_part = v;
+ char *page;
+ char *path;
+
+ if (!(page = (char *)__get_free_page(GFP_KERNEL)))
+ return -ENOMEM;
+ path = d_path(dump_part->dentry, dump_part->vfsmount, page, PAGE_SIZE);
+ seq_printf(seq, "%s %lu %lu\n",
+ path, dump_part->start_sect, dump_part->nr_sects);
+ free_page((unsigned long)page);
+ return 0;
+}
+
+static struct seq_operations disk_dump_seq_ops = {
+ .start = disk_dump_seq_start,
+ .next = disk_dump_seq_next,
+ .stop = disk_dump_seq_stop,
+ .show = disk_dump_seq_show,
+};
+
+static int disk_dump_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &disk_dump_seq_ops);
+}
+
+static struct file_operations disk_dump_fops = {
+ .owner = THIS_MODULE,
+ .open = disk_dump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+ .ioctl = proc_ioctl,
+};
+#endif
+
+
+int register_disk_dump_type(struct disk_dump_type *dump_type)
+{
+ spin_lock(&disk_dump_lock);
+ list_add(&dump_type->list, &disk_dump_types);
+ set_crc_modules();
+ spin_unlock(&disk_dump_lock);
+
+ return 0;
+}
+
+int unregister_disk_dump_type(struct disk_dump_type *dump_type)
+{
+ spin_lock(&disk_dump_lock);
+ list_del(&dump_type->list);
+ set_crc_modules();
+ spin_unlock(&disk_dump_lock);
+
+ return 0;
+}
+
+EXPORT_SYMBOL(register_disk_dump_type);
+EXPORT_SYMBOL(unregister_disk_dump_type);
+EXPORT_SYMBOL(disk_dump_notifier_list);
+
+
+static void compute_total_blocks(void)
+{
+ unsigned int nr;
+
+ /*
+ * the number of block of the common header and the header
+ * that is depend on the architecture
+ *
+ * block 0: dump partition header
+ * block 1: dump header
+ * block 2: dump subheader
+ * block 3..n: memory bitmap
+ * block (n + 1)...: saved memory
+ *
+ * We never overwrite block 0
+ */
+ header_blocks = 2 + size_of_sub_header();
+
+ total_ram_blocks = 0;
+ for (nr = 0; nr < max_mapnr; nr++) {
+ if (page_is_ram(nr))
+ total_ram_blocks++;
+ }
+
+ bitmap_blocks = ROUNDUP(max_mapnr, 8 * PAGE_SIZE);
+
+ /*
+ * The necessary size of area for dump is:
+ * 1 block for common header
+ * m blocks for architecture dependent header
+ * n blocks for memory bitmap
+ * and whole memory
+ */
+ total_blocks = header_blocks + bitmap_blocks + total_ram_blocks;
+
+ Info("header blocks: %u", header_blocks);
+ Info("bitmap blocks: %u", bitmap_blocks);
+ Info("total number of memory blocks: %u", total_ram_blocks);
+ Info("total blocks written: %u", total_blocks);
+}
+
+static int init_diskdump(void)
+{
+ unsigned long long t0;
+ unsigned long long t1;
+ struct page *page;
+
+ if (!platform_supports_diskdump) {
+ Err("platform does not support diskdump.");
+ return -1;
+ }
+
+ /* Allocate one block that is used temporally */
+ do {
+ page = alloc_pages(GFP_KERNEL, block_order);
+ if (page != NULL)
+ break;
+ } while (--block_order >= 0);
+ if (!page) {
+ Err("alloc_pages failed.");
+ return -1;
+ }
+ scratch = page_address(page);
+ Info("Maximum block size: %lu", PAGE_SIZE << block_order);
+
+ if (diskdump_register_hook(disk_dump)) {
+ Err("failed to register hooks.");
+ return -1;
+ }
+
+ compute_total_blocks();
+
+ platform_timestamp(t0);
+ mdelay(1);
+ platform_timestamp(t1);
+ timestamp_1sec = (unsigned long)(t1 - t0) * 1000;
+
+#ifdef CONFIG_PROC_FS
+ {
+ struct proc_dir_entry *p;
+
+ p = create_proc_entry("diskdump", S_IRUGO|S_IWUSR, NULL);
+ if (p)
+ p->proc_fops = &disk_dump_fops;
+ }
+#endif
+
+ return 0;
+}
+
+static void cleanup_diskdump(void)
+{
+ Info("shut down.");
+ diskdump_unregister_hook();
+ free_pages((unsigned long)scratch, block_order);
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry("diskdump", NULL);
+#endif
+}
+
+module_init(init_diskdump);
+module_exit(cleanup_diskdump);
+MODULE_LICENSE("GPL");
diff -Nur linux-2.6.6.org/drivers/block/diskdumplib.c linux-2.6.6/drivers/block/diskdumplib.c
--- linux-2.6.6.org/drivers/block/diskdumplib.c 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.6/drivers/block/diskdumplib.c 2004-05-27 09:52:45.000000000 +0900
@@ -0,0 +1,203 @@
+/*
+ * linux/drivers/block/diskdumplib.c
+ *
+ * Copyright (C) 2004 FUJITSU LIMITED
+ * Written by Nobuhiro Tachino (ntachino@jp.fujitsu.com)
+ *
+ */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/file.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/utsname.h>
+#include <linux/smp_lock.h>
+#include <linux/genhd.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/diskdump.h>
+#include <linux/diskdumplib.h>
+#include <asm/diskdump.h>
+
+/*
+ * timer list and tasklet_struct holder
+ */
+unsigned long volatile diskdump_base_jiffies;
+static unsigned long long timestamp_base;
+static unsigned long timestamp_hz;
+
+#define DISKDUMP_NUM_TASKLETS 8
+
+/*
+ * We can't use next field of tasklet because it breaks the original
+ * tasklets chain and we have no way to know which chain the tasklet is
+ * linked.
+ */
+static struct tasklet_struct *diskdump_tasklets[DISKDUMP_NUM_TASKLETS];
+
+static LIST_HEAD(diskdump_timers);
+static LIST_HEAD(diskdump_workq);
+
+
+static int store_tasklet(struct tasklet_struct *tasklet)
+{
+ int i;
+
+ for (i = 0; i < DISKDUMP_NUM_TASKLETS; i++)
+ if (diskdump_tasklets[i] == NULL) {
+ diskdump_tasklets[i] = tasklet;
+ return 0;
+ }
+ return -1;
+}
+
+static struct tasklet_struct *find_tasklet(struct tasklet_struct *tasklet)
+{
+ int i;
+
+ for (i = 0; i < DISKDUMP_NUM_TASKLETS; i++)
+ if (diskdump_tasklets[i] == tasklet)
+ return diskdump_tasklets[i];
+ return NULL;
+}
+
+void _diskdump_tasklet_schedule(struct tasklet_struct *tasklet)
+{
+ if (!find_tasklet(tasklet))
+ if (store_tasklet(tasklet))
+ printk(KERN_ERR "diskdumplib: too many tasklet. Ignored\n");
+ set_bit(TASKLET_STATE_SCHED, &tasklet->state);
+}
+
+int _diskdump_schedule_work(struct work_struct *work)
+{
+ list_add_tail(&work->entry, &diskdump_workq);
+ return 1;
+}
+
+void _diskdump_add_timer(struct timer_list *timer)
+{
+ timer->base = (void *)1;
+ list_add(&timer->entry, &diskdump_timers);
+}
+
+int _diskdump_del_timer(struct timer_list *timer)
+{
+ if (timer->base != NULL) {
+ list_del(&timer->entry);
+ return 1;
+ } else {
+ timer->base = NULL;
+ return 0;
+ }
+}
+
+int _diskdump_mod_timer(struct timer_list *timer, unsigned long expires)
+{
+ int ret;
+
+ ret = _diskdump_del_timer(timer);
+ timer->expires = expires;
+ _diskdump_add_timer(timer);
+
+ return ret;
+}
+
+static void update_jiffies(void)
+{
+ unsigned long long t;
+
+ platform_timestamp(t);
+ while (t > timestamp_base + timestamp_hz) {
+ timestamp_base += timestamp_hz;
+ jiffies++;
+ platform_timestamp(t);
+ }
+}
+
+void diskdump_update(void)
+{
+ struct tasklet_struct *tasklet;
+ struct work_struct *work;
+ struct timer_list *timer;
+ struct list_head *t, *n, head;
+ int i;
+
+ update_jiffies();
+
+ /* run timers */
+ list_for_each_safe(t, n, &diskdump_timers) {
+ timer = list_entry(t, struct timer_list, entry);
+ if (time_before_eq(timer->expires, jiffies)) {
+ list_del(t);
+ timer->function(timer->data);
+ }
+ }
+
+ /* run tasklet */
+ for (i = 0; i < DISKDUMP_NUM_TASKLETS; i++)
+ if ((tasklet = diskdump_tasklets[i]))
+ if (!atomic_read(&tasklet->count))
+ if (test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
+ tasklet->func(tasklet->data);
+
+ /* run work queue */
+ list_add(&head, &diskdump_workq);
+ list_del_init(&diskdump_workq);
+ n = head.next;
+ while (n != &head) {
+ work = list_entry(t, struct work_struct, entry);
+ n = n->next;
+ if (work->func)
+ work->func(work->wq_data);
+ }
+}
+
+void diskdump_lib_init(void)
+{
+ unsigned long long t;
+
+ /* Save original jiffies value */
+ diskdump_base_jiffies = jiffies;
+
+ platform_timestamp(timestamp_base);
+ udelay(1000000/HZ);
+ platform_timestamp(t);
+ timestamp_hz = (unsigned long)(t - timestamp_base);
+
+ diskdump_update();
+}
+
+void diskdump_lib_exit(void)
+{
+ /* Resotre original jiffies. */
+ jiffies = diskdump_base_jiffies;
+}
+
+EXPORT_SYMBOL(diskdump_lib_init);
+EXPORT_SYMBOL(diskdump_lib_exit);
+EXPORT_SYMBOL(diskdump_update);
+EXPORT_SYMBOL(_diskdump_add_timer);
+EXPORT_SYMBOL(_diskdump_del_timer);
+EXPORT_SYMBOL(_diskdump_mod_timer);
+EXPORT_SYMBOL(_diskdump_tasklet_schedule);
+EXPORT_SYMBOL(_diskdump_schedule_work);
+
+MODULE_LICENSE("GPL");
diff -Nur linux-2.6.6.org/drivers/block/genhd.c linux-2.6.6/drivers/block/genhd.c
--- linux-2.6.6.org/drivers/block/genhd.c 2004-05-20 08:58:52.000000000 +0900
+++ linux-2.6.6/drivers/block/genhd.c 2004-05-27 09:52:45.000000000 +0900
@@ -224,6 +224,8 @@
return kobj ? to_disk(kobj) : NULL;
}
+EXPORT_SYMBOL(get_gendisk);
+
#ifdef CONFIG_PROC_FS
/* iterator */
static void *part_start(struct seq_file *part, loff_t *pos)
@@ -629,3 +631,28 @@
}
EXPORT_SYMBOL(invalidate_partition);
+
+/*
+ * Dump stuff.
+ */
+void (*diskdump_func) (struct pt_regs *regs) = NULL;
+EXPORT_SYMBOL_GPL(diskdump_func);
+
+int diskdump_register_hook(void (*dump_func) (struct pt_regs *))
+{
+ if (diskdump_func)
+ return -EEXIST;
+
+ diskdump_func = dump_func;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(diskdump_register_hook);
+
+void diskdump_unregister_hook(void)
+{
+ diskdump_func = NULL;
+}
+
+EXPORT_SYMBOL_GPL(diskdump_unregister_hook);
diff -Nur linux-2.6.6.org/drivers/char/sysrq.c linux-2.6.6/drivers/char/sysrq.c
--- linux-2.6.6.org/drivers/char/sysrq.c 2004-05-20 08:58:32.000000000 +0900
+++ linux-2.6.6/drivers/char/sysrq.c 2004-05-27 09:52:45.000000000 +0900
@@ -107,6 +107,19 @@
.action_msg = "Resetting",
};
+/* crash sysrq handler */
+static void sysrq_handle_crash(int key, struct pt_regs *pt_regs,
+ struct tty_struct *tty)
+{
+ *( (char *) 0) = 0;
+}
+
+static struct sysrq_key_op sysrq_crash_op = {
+ .handler = sysrq_handle_crash,
+ .help_msg = "Crash",
+ .action_msg = "Crashing the kernel by request",
+};
+
static void sysrq_handle_sync(int key, struct pt_regs *pt_regs,
struct tty_struct *tty)
{
@@ -235,7 +248,7 @@
it is handled specially on the sparc
and will never arrive */
/* b */ &sysrq_reboot_op,
-/* c */ NULL,
+/* c */ &sysrq_crash_op,
/* d */ NULL,
/* e */ &sysrq_term_op,
/* f */ NULL,
diff -Nur linux-2.6.6.org/include/asm-i386/diskdump.h linux-2.6.6/include/asm-i386/diskdump.h
--- linux-2.6.6.org/include/asm-i386/diskdump.h 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.6/include/asm-i386/diskdump.h 2004-05-27 09:52:45.000000000 +0900
@@ -0,0 +1,73 @@
+#ifndef _ASM_I386_DISKDUMP_H
+#define _ASM_I386_DISKDUMP_H
+
+/*
+ * linux/include/asm-i386/diskdump.h
+ *
+ * Copyright (c) 2004 FUJITSU LIMITED
+ * Copyright (c) 2003 Red Hat, Inc. All rights reserved.
+ */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifdef __KERNEL__
+
+#include <linux/elf.h>
+
+extern int page_is_ram(unsigned long);
+const static int platform_supports_diskdump = 1;
+
+#define platform_fix_regs() \
+{ \
+ unsigned long esp; \
+ unsigned short ss; \
+ esp = (unsigned long) ((char *)regs + sizeof (struct pt_regs)); \
+ ss = __KERNEL_DS; \
+ if (regs->xcs & 3) { \
+ esp = regs->esp; \
+ ss = regs->xss & 0xffff; \
+ } \
+ myregs = *regs; \
+ myregs.esp = esp; \
+ myregs.xss = (myregs.xss & 0xffff0000) | ss; \
+}
+
+struct disk_dump_sub_header {
+ elf_gregset_t elf_regs;
+ struct task_struct *tasks[NR_CPUS];
+};
+
+#define platform_timestamp(x) rdtscll(x)
+
+#define size_of_sub_header() ((sizeof(struct disk_dump_sub_header) + PAGE_SIZE - 1) / DUMP_BLOCK_SIZE)
+
+#define write_sub_header() \
+({ \
+ int ret; \
+ struct disk_dump_sub_header *header; \
+ \
+ header = (struct disk_dump_sub_header *)scratch; \
+ ELF_CORE_COPY_REGS(header->elf_regs, (&myregs)); \
+ memcpy(&header->tasks, tasks, sizeof(tasks)); \
+ if ((ret = write_blocks(dump_part, 2, scratch, 1)) >= 0)\
+ ret = 1; /* size of sub header in page */; \
+ ret; \
+})
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_I386_DISKDUMP_H */
diff -Nur linux-2.6.6.org/include/asm-i386/kmap_types.h linux-2.6.6/include/asm-i386/kmap_types.h
--- linux-2.6.6.org/include/asm-i386/kmap_types.h 2004-05-20 08:59:04.000000000 +0900
+++ linux-2.6.6/include/asm-i386/kmap_types.h 2004-05-27 09:52:45.000000000 +0900
@@ -24,7 +24,8 @@
D(11) KM_IRQ1,
D(12) KM_SOFTIRQ0,
D(13) KM_SOFTIRQ1,
-D(14) KM_TYPE_NR
+D(14) KM_DISKDUMP,
+D(15) KM_TYPE_NR
};
#undef D
diff -Nur linux-2.6.6.org/include/linux/diskdump.h linux-2.6.6/include/linux/diskdump.h
--- linux-2.6.6.org/include/linux/diskdump.h 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.6/include/linux/diskdump.h 2004-05-27 09:52:45.000000000 +0900
@@ -0,0 +1,150 @@
+#ifndef _LINUX_DISKDUMP_H
+#define _LINUX_DISKDUMP_H
+
+/*
+ * linux/include/linux/diskdump.h
+ *
+ * Copyright (c) 2004 FUJITSU LIMITED
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <linux/list.h>
+#include <linux/mount.h>
+#include <linux/dcache.h>
+#include <linux/blkdev.h>
+#include <linux/utsname.h>
+
+/* Dump I/O unit. Must be the same of PAGE_SIZE */
+#define DUMP_BLOCK_SIZE PAGE_SIZE
+#define DUMP_BLOCK_SHIFT PAGE_SHIFT
+
+/* Dump ioctl */
+#define BLKADDDUMPDEVICE 0xdf00 /* Add a dump device */
+#define BLKREMOVEDUMPDEVICE 0xdf01 /* Delete a dump device */
+
+int diskdump_register_hook(void (*dump_func)(struct pt_regs *));
+void diskdump_unregister_hook(void);
+
+/*
+ * The handler that adapter driver provide for the common module of
+ * dump
+ */
+struct disk_dump_partition;
+struct disk_dump_device;
+
+struct disk_dump_type {
+ void *(*probe)(dev_t);
+ int (*add_device)(struct disk_dump_device *);
+ void (*remove_device)(struct disk_dump_device *);
+ struct module *owner;
+ struct list_head list;
+};
+
+struct disk_dump_device_ops {
+ int (*sanity_check)(struct disk_dump_device *);
+ int (*quiesce)(struct disk_dump_device *);
+ int (*shutdown)(struct disk_dump_device *);
+ int (*rw_block)(struct disk_dump_partition *, int rw, unsigned long block_nr, void *buf, int len);
+};
+
+struct disk_dump_device {
+ struct list_head list;
+ struct disk_dump_device_ops ops;
+ struct disk_dump_type *dump_type;
+ void *device;
+ unsigned int max_blocks;
+ struct list_head partitions;
+};
+
+struct disk_dump_partition {
+ struct list_head list;
+ struct disk_dump_device *device;
+ struct vfsmount *vfsmount;
+ struct dentry *dentry;
+ unsigned long start_sect;
+ unsigned long nr_sects;
+};
+
+
+int register_disk_dump_type(struct disk_dump_type *);
+int unregister_disk_dump_type(struct disk_dump_type *);
+
+
+/*
+ * Dump header that doesn't depend on the architecture
+ */
+
+#define DISK_DUMP_SIGNATURE "DISKDUMP"
+
+#define DUMP_PARTITION_SIGNATURE "diskdump"
+
+#define DUMP_HEADER_COMPLETED 0
+#define DUMP_HEADER_INCOMPLETED 1
+
+struct disk_dump_header {
+ char signature[8]; /* = "DISKDUMP" */
+ struct new_utsname utsname; /* copy of system_utsname */
+ struct timespec timestamp; /* Time stamp */
+ unsigned int status; /* Above flags */
+ int block_size; /* Size of a block in byte */
+ int sub_hdr_size; /* Size of arch dependent
+ header in blocks */
+ unsigned int bitmap_blocks; /* Size of Memory bitmap in
+ block */
+ unsigned int max_mapnr; /* = max_mapnr */
+ unsigned int total_ram_blocks;/* Number of blocks should be
+ written */
+ unsigned int device_blocks; /* Number of total blocks in
+ * the dump device */
+ unsigned int written_blocks; /* Number of written blocks */
+};
+
+/*
+ * Calculate the check sum of whole module
+ */
+#define get_crc_module() \
+({ \
+ struct module *module = &__this_module; \
+ crc32_le(0, (char *)(module->module_core), \
+ ((unsigned long)module - (unsigned long)(module->module_core))); \
+})
+
+/* Calculate the checksum of the whole module */
+#define set_crc_modules() \
+({ \
+ module_crc = 0; \
+ module_crc = get_crc_module(); \
+})
+
+/*
+ * Compare the checksum value that is stored in module_crc to the check
+ * sum of current whole module. Must be called with holding disk_dump_lock.
+ * Return TRUE if they are the same, else return FALSE
+ *
+ */
+#define check_crc_module() \
+({ \
+ uint32_t orig_crc, cur_crc; \
+ \
+ orig_crc = module_crc; module_crc = 0; \
+ cur_crc = get_crc_module(); \
+ module_crc = orig_crc; \
+ orig_crc == cur_crc; \
+})
+
+
+#endif /* _LINUX_DISKDUMP_H */
diff -Nur linux-2.6.6.org/include/linux/diskdumplib.h linux-2.6.6/include/linux/diskdumplib.h
--- linux-2.6.6.org/include/linux/diskdumplib.h 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.6/include/linux/diskdumplib.h 2004-05-27 09:52:45.000000000 +0900
@@ -0,0 +1,57 @@
+#ifndef _LINUX_DISKDUMPLIB_H
+#define _LINUX_DISKDUMPLIB_H
+
+#include <linux/interrupt.h>
+#include <linux/timer.h>
+
+void diskdump_lib_init(void);
+void diskdump_lib_exit(void);
+void diskdump_update(void);
+
+void _diskdump_add_timer(struct timer_list *);
+int _diskdump_del_timer(struct timer_list *);
+int _diskdump_mod_timer(struct timer_list *, unsigned long);
+void _diskdump_tasklet_schedule(struct tasklet_struct *);
+int _diskdump_schedule_work(struct work_struct *);
+
+static inline void diskdump_add_timer(struct timer_list *timer)
+{
+ if (crashdump_mode())
+ _diskdump_add_timer(timer);
+ else
+ add_timer(timer);
+}
+
+static inline int diskdump_del_timer(struct timer_list *timer)
+{
+ if (crashdump_mode())
+ return _diskdump_del_timer(timer);
+ else
+ return del_timer(timer);
+}
+
+static inline int diskdump_mod_timer(struct timer_list *timer, unsigned long expires)
+{
+ if (crashdump_mode())
+ return _diskdump_mod_timer(timer, expires);
+ else
+ return mod_timer(timer, expires);
+}
+
+static inline void diskdump_tasklet_schedule(struct tasklet_struct *tasklet)
+{
+ if (crashdump_mode())
+ return _diskdump_tasklet_schedule(tasklet);
+ else
+ return tasklet_schedule(tasklet);
+}
+
+static inline int diskdump_schedule_work(struct work_struct *work)
+{
+ if (crashdump_mode())
+ return _diskdump_schedule_work(work);
+ else
+ return schedule_work(work);
+}
+
+#endif /* _LINUX_DISKDUMPLIB_H */
diff -Nur linux-2.6.6.org/include/linux/kernel.h linux-2.6.6/include/linux/kernel.h
--- linux-2.6.6.org/include/linux/kernel.h 2004-05-20 08:58:59.000000000 +0900
+++ linux-2.6.6/include/linux/kernel.h 2004-05-27 09:52:45.000000000 +0900
@@ -112,6 +112,10 @@
extern int system_state; /* See values below */
extern int tainted;
extern const char *print_tainted(void);
+extern void try_crashdump(struct pt_regs *);
+extern void (*diskdump_func) (struct pt_regs *regs);
+extern int diskdump_mode;
+#define crashdump_mode() (diskdump_mode)
/* Values used for system_state */
#define SYSTEM_BOOTING 0
diff -Nur linux-2.6.6.org/kernel/panic.c linux-2.6.6/kernel/panic.c
--- linux-2.6.6.org/kernel/panic.c 2004-05-20 08:59:23.000000000 +0900
+++ linux-2.6.6/kernel/panic.c 2004-05-27 09:52:45.000000000 +0900
@@ -23,8 +23,10 @@
int panic_timeout;
int panic_on_oops;
int tainted;
+int diskdump_mode = 0;
EXPORT_SYMBOL(panic_timeout);
+EXPORT_SYMBOL_GPL(diskdump_mode);
struct notifier_block *panic_notifier_list;
@@ -60,6 +62,8 @@
vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
printk(KERN_EMERG "Kernel panic: %s\n",buf);
+ if (diskdump_func)
+ BUG();
if (in_interrupt())
printk(KERN_EMERG "In interrupt handler - not syncing\n");
else if (!current->pid)
@@ -134,3 +138,22 @@
snprintf(buf, sizeof(buf), "Not tainted");
return(buf);
}
+
+/*
+ * Try crashdump. Diskdump is first, netdump is second.
+ * We clear diskdump_func before call of diskdump_func, so
+ * If double panic would occur in diskdump, netdump can handle
+ * it.
+ */
+void try_crashdump(struct pt_regs *regs)
+{
+ void (*func)(struct pt_regs *);
+
+ if (diskdump_func) {
+ func = diskdump_func;
+ diskdump_func = NULL;
+ func(regs);
+ }
+ if (panic_on_oops)
+ panic("Fatal exception");
+}
^ permalink raw reply [flat|nested] 38+ messages in thread
* [2/4] [PATCH]Diskdump - yet another crash dump function
2004-05-27 9:33 [PATCH]Diskdump - yet another crash dump function Takao Indoh
2004-05-27 12:36 ` [1/4] " Takao Indoh
@ 2004-05-27 12:37 ` Takao Indoh
2004-05-27 13:48 ` Christoph Hellwig
2004-05-27 12:39 ` [3/4] " Takao Indoh
` (3 subsequent siblings)
5 siblings, 1 reply; 38+ messages in thread
From: Takao Indoh @ 2004-05-27 12:37 UTC (permalink / raw)
To: linux-kernel
Hi,
This is a patch for scsi common layer.
Best Regards,
Takao Indoh
diff -Nur linux-2.6.6.org/drivers/scsi/Kconfig linux-2.6.6/drivers/scsi/Kconfig
--- linux-2.6.6.org/drivers/scsi/Kconfig 2004-05-20 08:58:48.000000000 +0900
+++ linux-2.6.6/drivers/scsi/Kconfig 2004-05-27 09:24:46.000000000 +0900
@@ -55,6 +55,12 @@
In this case, do not compile the driver for your SCSI host adapter
(below) as a module either.
+config SCSI_DUMP
+ tristate "SCSI dump support"
+ depends on DISKDUMP && SCSI
+ help
+ SCSI dump support
+
config CHR_DEV_ST
tristate "SCSI tape support"
depends on SCSI
diff -Nur linux-2.6.6.org/drivers/scsi/Makefile linux-2.6.6/drivers/scsi/Makefile
--- linux-2.6.6.org/drivers/scsi/Makefile 2004-05-20 08:58:48.000000000 +0900
+++ linux-2.6.6/drivers/scsi/Makefile 2004-05-27 09:24:46.000000000 +0900
@@ -133,6 +133,8 @@
obj-$(CONFIG_BLK_DEV_SR) += sr_mod.o
obj-$(CONFIG_CHR_DEV_SG) += sg.o
+obj-$(CONFIG_SCSI_DUMP) += scsi_dump.o
+
scsi_mod-y += scsi.o hosts.o scsi_ioctl.o constants.o \
scsicam.o scsi_error.o scsi_lib.o \
scsi_scan.o scsi_syms.o scsi_sysfs.o \
diff -Nur linux-2.6.6.org/drivers/scsi/scsi.c linux-2.6.6/drivers/scsi/scsi.c
--- linux-2.6.6.org/drivers/scsi/scsi.c 2004-05-20 08:58:48.000000000 +0900
+++ linux-2.6.6/drivers/scsi/scsi.c 2004-05-27 09:24:46.000000000 +0900
@@ -691,6 +691,10 @@
{
unsigned long flags;
+#if defined(CONFIG_SCSI_DUMP) || defined(CONFIG_SCSI_DUMP_MODULE)
+ if (crashdump_mode())
+ return;
+#endif
/*
* We don't have to worry about this one timing out any more.
* If we are unable to remove the timer, then the command
diff -Nur linux-2.6.6.org/drivers/scsi/scsi_dump.c linux-2.6.6/drivers/scsi/scsi_dump.c
--- linux-2.6.6.org/drivers/scsi/scsi_dump.c 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.6/drivers/scsi/scsi_dump.c 2004-05-27 09:38:53.000000000 +0900
@@ -0,0 +1,565 @@
+/*
+ * linux/drivers/scsi/scsi_dump.c
+ *
+ * Copyright (C) 2004 FUJITSU LIMITED
+ * Written by Nobuhiro Tachino (ntachino@jp.fujitsu.com)
+ *
+ * Some codes are derived from drivers/scsi/sd.c
+ */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include <linux/blkdev.h>
+#include <linux/blkpg.h>
+#include "scsi.h"
+#include "scsi_priv.h"
+#include "hosts.h"
+#include "scsi_dump.h"
+#include <scsi/scsi_ioctl.h>
+
+#include <linux/genhd.h>
+#include <linux/utsname.h>
+#include <linux/crc32.h>
+#include <linux/diskdump.h>
+#include <linux/diskdumplib.h>
+#include <linux/delay.h>
+
+#define MAX_RETRIES 5
+#define SD_TIMEOUT (60 * HZ)
+
+#define DEBUG 0
+#if DEBUG
+# define Dbg(x, ...) printk(KERN_INFO "scsi_dump:" x "\n", ## __VA_ARGS__)
+#else
+# define Dbg(x...)
+#endif
+
+#define Err(x, ...) printk(KERN_ERR "scsi_dump: " x "\n", ## __VA_ARGS__);
+#define Warn(x, ...) printk(KERN_WARNING "scsi_dump: " x "\n", ## __VA_ARGS__)
+#define Info(x, ...) printk(x "\n", ## __VA_ARGS__)
+
+/* blocks to 512byte sectors */
+#define BLOCK_SECTOR(s) ((s) << (DUMP_BLOCK_SHIFT - 9))
+
+static int quiesce_ok = 0;
+static Scsi_Cmnd scsi_dump_cmnd;
+static struct request scsi_dump_req;
+static uint32_t module_crc;
+
+static void rw_intr(Scsi_Cmnd * scmd)
+{
+ scmd->done = NULL;
+}
+
+/*
+ * Common code to make Scsi_Cmnd
+ */
+static void init_scsi_command(Scsi_Device *sdev, Scsi_Cmnd *scmd, void *buf, int len, unsigned char direction, int set_lun)
+{
+ scmd->request = &scsi_dump_req;
+ scmd->sc_magic = SCSI_CMND_MAGIC;
+ scmd->owner = SCSI_OWNER_MIDLEVEL;
+ scmd->device = sdev;
+ scmd->buffer = scmd->request_buffer = buf;
+ scmd->bufflen = scmd->request_bufflen = len;
+
+
+ scmd->sc_data_direction = direction;
+
+ memcpy(scmd->data_cmnd, scmd->cmnd, sizeof(scmd->cmnd));
+ scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
+ scmd->old_cmd_len = scmd->cmd_len;
+
+
+ if (set_lun)
+ scmd->cmnd[1] |= (sdev->scsi_level <= SCSI_2) ?
+ ((sdev->lun << 5) & 0xe0) : 0;
+
+ scmd->transfersize = sdev->sector_size;
+ if (direction == SCSI_DATA_WRITE)
+ scmd->underflow = len;
+
+ scmd->allowed = MAX_RETRIES;
+ scmd->timeout_per_command = SD_TIMEOUT;
+
+ /*
+ * This is the completion routine we use. This is matched in terms
+ * of capability to this function.
+ */
+ scmd->done = rw_intr;
+}
+
+static void init_mode_sense_command(Scsi_Device *sdev, Scsi_Cmnd *scmd, void *buf)
+{
+ memset(scmd, 0, sizeof(*scmd));
+ scmd->cmnd[0] = MODE_SENSE;
+ scmd->cmnd[1] = 0x00; /* DBD=0 */
+ scmd->cmnd[2] = 0x08; /* PCF=0 Page 8(Cache) */
+ scmd->cmnd[4] = 255;
+
+ init_scsi_command(sdev, scmd, buf, 256, SCSI_DATA_READ, 1);
+}
+
+static void init_mode_select_command(Scsi_Device *sdev, Scsi_Cmnd *scmd, void *buf, int len)
+{
+ memset(scmd, 0, sizeof(*scmd));
+ scmd->cmnd[0] = MODE_SELECT;
+ scmd->cmnd[1] = 0x10; /* PF=1 SP=0 */
+ scmd->cmnd[4] = len;
+
+ init_scsi_command(sdev, scmd, buf, len, SCSI_DATA_WRITE, 1);
+}
+
+static void init_sync_command(Scsi_Device *sdev, Scsi_Cmnd * scmd)
+{
+ memset(scmd, 0, sizeof(*scmd));
+ scmd->cmnd[0] = SYNCHRONIZE_CACHE;
+
+ init_scsi_command(sdev, scmd, NULL, 0, SCSI_DATA_NONE, 0);
+}
+
+static void init_sense_command(Scsi_Device *sdev, Scsi_Cmnd *scmd, void *buf)
+{
+ memset(scmd, 0, sizeof(*scmd));
+ scmd->cmnd[0] = REQUEST_SENSE;
+ scmd->cmnd[4] = 255;
+
+ init_scsi_command(sdev, scmd, buf, 256, SCSI_DATA_READ, 1);
+}
+
+static int init_rw_command(struct disk_dump_partition *dump_part, Scsi_Device *sdev, Scsi_Cmnd * scmd, int rw, int block, void *buf, unsigned int len)
+{
+ int this_count = len >> 9;
+
+ memset(scmd, 0, sizeof(*scmd));
+
+ if (block + this_count > dump_part->nr_sects) {
+ Err("block number %d is larger than %lu",
+ block + this_count, dump_part->nr_sects);
+ return -EFBIG;
+ }
+
+ block += dump_part->start_sect;
+
+ /*
+ * If we have a 1K hardware sectorsize, prevent access to single
+ * 512 byte sectors. In theory we could handle this - in fact
+ * the scsi cdrom driver must be able to handle this because
+ * we typically use 1K blocksizes, and cdroms typically have
+ * 2K hardware sectorsizes. Of course, things are simpler
+ * with the cdrom, since it is read-only. For performance
+ * reasons, the filesystems should be able to handle this
+ * and not force the scsi disk driver to use bounce buffers
+ * for this.
+ */
+ if (sdev->sector_size == 1024) {
+ block = block >> 1;
+ this_count = this_count >> 1;
+ }
+ if (sdev->sector_size == 2048) {
+ block = block >> 2;
+ this_count = this_count >> 2;
+ }
+ if (sdev->sector_size == 4096) {
+ block = block >> 3;
+ this_count = this_count >> 3;
+ }
+ switch (rw) {
+ case WRITE:
+ if (!sdev->writeable) {
+ Err("writable media");
+ return 0;
+ }
+ scmd->cmnd[0] = WRITE_10;
+ break;
+ case READ:
+ scmd->cmnd[0] = READ_10;
+ break;
+ default:
+ Err("Unknown command %d", rw);
+ return -EINVAL;
+ }
+
+ if (this_count > 0xffff)
+ this_count = 0xffff;
+
+ scmd->cmnd[2] = (unsigned char) (block >> 24) & 0xff;
+ scmd->cmnd[3] = (unsigned char) (block >> 16) & 0xff;
+ scmd->cmnd[4] = (unsigned char) (block >> 8) & 0xff;
+ scmd->cmnd[5] = (unsigned char) block & 0xff;
+ scmd->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff;
+ scmd->cmnd[8] = (unsigned char) this_count & 0xff;
+
+ init_scsi_command(sdev, scmd, buf, len,
+ (rw == WRITE ? SCSI_DATA_WRITE : SCSI_DATA_READ), 1);
+ return 0;
+}
+
+/*
+ * Check the status of scsi command and determine whether it is
+ * success, fail, or retriable.
+ *
+ * Return code
+ * > 0: should retry
+ * = 0: success
+ * < 0: fail
+ */
+static int cmd_result(Scsi_Cmnd *scmd)
+{
+ int status;
+
+ status = status_byte(scmd->result);
+
+ switch (scsi_decide_disposition(scmd)) {
+ case FAILED:
+ break;
+ case NEEDS_RETRY:
+ case ADD_TO_MLQUEUE:
+ return 1 /* retry */;
+ case SUCCESS:
+ if (host_byte(scmd->result) != DID_OK)
+ break;
+
+ if (status == GOOD || status == INTERMEDIATE_GOOD
+ || status == INTERMEDIATE_C_GOOD)
+ return 0;
+ if (status == CHECK_CONDITION && scmd->sense_buffer[2] == RECOVERED_ERROR)
+ return 0;
+ break;
+ default:
+ Err("bad disposition: %d", scmd->result);
+ return -EIO;
+ }
+
+ Err("command %x failed with 0x%x", scmd->cmnd[0], scmd->result);
+ return -EIO;
+}
+
+static int send_command(Scsi_Cmnd *scmd)
+{
+ struct Scsi_Host *host = scmd->device->host;
+ Scsi_Device *sdev = scmd->device;
+ int ret;
+
+ do {
+ if (!scsi_device_online(sdev)) {
+ Err("Scsi disk is not online");
+ return -EIO;
+ }
+ if (sdev->changed) {
+ Err("SCSI disk has been changed. Prohibiting further I/O");
+ return -EIO;
+ }
+
+ spin_lock(host->host_lock);
+ host->hostt->queuecommand(scmd, rw_intr);
+ spin_unlock(host->host_lock);
+
+ while (scmd->done != NULL) {
+ host->hostt->dump_ops->poll(scmd->device);
+ udelay(100);
+ diskdump_update();
+ }
+ scmd->done = rw_intr;
+ } while ((ret = cmd_result(scmd)) > 0);
+
+ return ret;
+}
+
+/*
+ * If Write Cache Enable of disk device is not set, write I/O takes
+ * long long time. So enable WCE temporary and issue SYNCHRONIZE CACHE
+ * after all write I/Os are done, Following system reboot will reset
+ * WCE bit to original value.
+ */
+static void
+enable_write_cache(Scsi_Device *sdev)
+{
+ char buf[256];
+ int ret;
+ int data_len;
+
+ Dbg("enable write cache");
+ memset(buf, 0, 256);
+
+ init_mode_sense_command(sdev, &scsi_dump_cmnd, buf);
+ if ((ret = send_command(&scsi_dump_cmnd)) < 0) {
+ Warn(KERN_WARNING "MODE SENSE failed");
+ return;
+ }
+
+ if (buf[14] & 0x04) /* WCE is already set */
+ return;
+
+ data_len = buf[0] + 1; /* Data length in mode parameter header */
+ buf[0] = 0;
+ buf[1] = 0;
+ buf[2] = 0;
+ buf[12] &= 0x7f; /* clear PS */
+ buf[14] |= 0x04; /* set WCE */
+
+ init_mode_select_command(sdev, &scsi_dump_cmnd, buf, data_len);
+ if ((ret = send_command(&scsi_dump_cmnd)) < 0) {
+ Warn("MODE SELECT failed");
+
+ init_sense_command(sdev, &scsi_dump_cmnd, buf);
+ if ((ret = send_command(&scsi_dump_cmnd)) < 0) {
+ Err("sense failed");
+ }
+ }
+}
+
+/*
+ * Check whether the dump device is sane enough to handle I/O.
+ *
+ * Return value:
+ * 0: the device is ok
+ * < 0: the device is not ok
+ * > 0: Cannot determine
+ */
+static int
+scsi_dump_sanity_check(struct disk_dump_device *dump_device)
+{
+ Scsi_Device *sdev = dump_device->device;
+ struct Scsi_Host *host = sdev->host;
+ int adapter_sanity = 0;
+ int sanity = 0;
+
+ if (!check_crc_module()) {
+ Err("checksum error. scsi dump module may be compromised.");
+ return -EINVAL;
+ }
+ /*
+ * If host's spinlock is already taken, assume it's part
+ * of crash and skip it.
+ */
+ if (!scsi_device_online(sdev)) {
+ Warn("device not online: host %d channel %d id %d lun %d",
+ host->host_no, sdev->channel, sdev->id, sdev->lun);
+ return -EIO;
+ }
+ if (sdev->changed) {
+ Err("SCSI disk has been changed. Prohibiting further I/O: host %d channel %d id %d lun %d",
+ host->host_no, sdev->channel, sdev->id, sdev->lun);
+ return -EIO;
+ }
+
+ if (host->hostt->dump_ops->sanity_check) {
+ adapter_sanity = host->hostt->dump_ops->sanity_check(sdev);
+ if (adapter_sanity < 0) {
+ Warn("adapter status is not sane");
+ return adapter_sanity;
+ }
+ }
+
+ if (!spin_is_locked(host->host_lock)) {
+ sanity = 0;
+ } else {
+ Warn("host_lock is held: host %d channel %d id %d lun %d",
+ host->host_no, sdev->channel, sdev->id, sdev->lun);
+ if (host->host_lock == &host->default_lock)
+ sanity = 1;
+ else
+ return -EIO;
+ }
+ return sanity + adapter_sanity;
+}
+
+/*
+ * Try to reset the host adapter. If the adapter does not have its host reset
+ * handler, try to use its bus device reset handler.
+ */
+static int scsi_dump_reset(Scsi_Device *sdev)
+{
+ struct Scsi_Host *host = sdev->host;
+ Scsi_Host_Template *hostt = host->hostt;
+ char buf[256];
+ int ret, i;
+
+ init_sense_command(sdev, &scsi_dump_cmnd, buf);
+
+ if (hostt->eh_host_reset_handler) {
+ spin_lock(host->host_lock);
+ ret = hostt->eh_host_reset_handler(&scsi_dump_cmnd);
+ } else if (hostt->eh_bus_reset_handler) {
+ spin_lock(host->host_lock);
+ ret = hostt->eh_bus_reset_handler(&scsi_dump_cmnd);
+ } else
+ return 0;
+ spin_unlock(host->host_lock);
+
+ if (ret != SUCCESS) {
+ Err("adapter reset failed");
+ return -EIO;
+ }
+
+ /* bus reset settle time. 5sec for old disk devices */
+ for (i = 0; i < 5000; i++) {
+ diskdump_update();
+ mdelay(1);
+ }
+
+ Dbg("request sense");
+ if ((ret = send_command(&scsi_dump_cmnd)) < 0) {
+ Err("sense failed");
+ return -EIO;
+ }
+ return 0;
+}
+
+static int
+scsi_dump_quiesce(struct disk_dump_device *dump_device)
+{
+ Scsi_Device *sdev = dump_device->device;
+ struct Scsi_Host *host = sdev->host;
+ int ret;
+
+ if (host->hostt->dump_ops->quiesce) {
+ ret = host->hostt->dump_ops->quiesce(sdev);
+ if (ret < 0)
+ return ret;
+ }
+
+ Dbg("do bus reset");
+ if ((ret = scsi_dump_reset(sdev)) < 0)
+ return ret;
+
+ if (sdev->scsi_level >= SCSI_2)
+ enable_write_cache(sdev);
+
+ quiesce_ok = 1;
+ return 0;
+}
+
+static int scsi_dump_rw_block(struct disk_dump_partition *dump_part, int rw, unsigned long dump_block_nr, void *buf, int len)
+{
+ struct disk_dump_device *dump_device = dump_part->device;
+ Scsi_Device *sdev = dump_device->device;
+ int block_nr = BLOCK_SECTOR(dump_block_nr);
+ int ret;
+
+ if (!quiesce_ok) {
+ Err("quiesce not called");
+ return -EIO;
+ }
+
+ ret = init_rw_command(dump_part, sdev, &scsi_dump_cmnd, rw,
+ block_nr, buf, DUMP_BLOCK_SIZE * len);
+ if (ret < 0) {
+ Err("init_rw_command failed");
+ return ret;
+ }
+ return send_command(&scsi_dump_cmnd);
+}
+
+static int
+scsi_dump_shutdown(struct disk_dump_device *dump_device)
+{
+ Scsi_Device *sdev = dump_device->device;
+ struct Scsi_Host *host = sdev->host;
+
+ if (sdev->scsi_level >= SCSI_2) {
+ init_sync_command(sdev, &scsi_dump_cmnd);
+ send_command(&scsi_dump_cmnd);
+ }
+
+ if (host->hostt->dump_ops->shutdown)
+ return host->hostt->dump_ops->shutdown(sdev);
+
+ return 0;
+}
+
+static void *scsi_dump_probe(dev_t dev)
+{
+ Scsi_Device *sdev;
+
+ sdev = sd_find_scsi_device(dev);
+ if (sdev == NULL)
+ return NULL;
+ if (!sdev->host->hostt->dump_ops)
+ return NULL;
+
+ return sdev;
+}
+
+
+struct disk_dump_device_ops scsi_dump_device_ops = {
+ .sanity_check = scsi_dump_sanity_check,
+ .rw_block = scsi_dump_rw_block,
+ .quiesce = scsi_dump_quiesce,
+ .shutdown = scsi_dump_shutdown,
+};
+
+static int scsi_dump_add_device(struct disk_dump_device *dump_device)
+{
+ Scsi_Device *sdev;
+
+ sdev = dump_device->device;
+ if (!sdev->host->hostt->dump_ops)
+ return -ENOTSUPP;
+
+ scsi_device_get(sdev); /* retval ignored ? */
+
+ memcpy(&dump_device->ops, &scsi_dump_device_ops, sizeof(scsi_dump_device_ops));
+ if (sdev->host->max_sectors) {
+ dump_device->max_blocks = (sdev->sector_size * sdev->host->max_sectors) >> DUMP_BLOCK_SHIFT;
+ }
+ return 0;
+}
+
+static void scsi_dump_remove_device(struct disk_dump_device *dump_device)
+{
+ Scsi_Device *sdev = dump_device->device;
+
+ scsi_device_put(sdev);
+}
+
+static struct disk_dump_type scsi_dump_type = {
+ .probe = scsi_dump_probe,
+ .add_device = scsi_dump_add_device,
+ .remove_device = scsi_dump_remove_device,
+ .owner = THIS_MODULE,
+};
+
+static int init_scsi_dump(void)
+{
+ int ret;
+
+ if ((ret = register_disk_dump_type(&scsi_dump_type)) < 0) {
+ Err("register failed");
+ return ret;
+ }
+ set_crc_modules();
+ return ret;
+}
+
+static void cleanup_scsi_dump(void)
+{
+ if (unregister_disk_dump_type(&scsi_dump_type) < 0)
+ Err("register failed");
+}
+
+module_init(init_scsi_dump);
+module_exit(cleanup_scsi_dump);
+MODULE_LICENSE("GPL");
diff -Nur linux-2.6.6.org/drivers/scsi/scsi_dump.h linux-2.6.6/drivers/scsi/scsi_dump.h
--- linux-2.6.6.org/drivers/scsi/scsi_dump.h 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.6/drivers/scsi/scsi_dump.h 2004-05-27 09:31:07.000000000 +0900
@@ -0,0 +1,38 @@
+#ifndef _SCSI_DUMP_H
+#define _SCSI_DUMP_H
+
+/*
+ * linux/drivers/scsi/scsi_dump.h
+ *
+ * Copyright (C) 2004 FUJITSU LIMITED
+ * Written by Nobuhiro Tachino (ntachino@jp.fujitsu.com)
+ *
+ */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+struct scsi_dump_ops {
+ int (*sanity_check)(Scsi_Device *);
+ int (*quiesce)(Scsi_Device *);
+ int (*shutdown)(Scsi_Device *);
+ void (*poll)(Scsi_Device *);
+};
+
+/* sd.c function */
+extern Scsi_Device *sd_find_scsi_device(dev_t);
+
+#endif /* _SCSI_DUMP_H */
diff -Nur linux-2.6.6.org/drivers/scsi/scsi_error.c linux-2.6.6/drivers/scsi/scsi_error.c
--- linux-2.6.6.org/drivers/scsi/scsi_error.c 2004-05-20 08:58:48.000000000 +0900
+++ linux-2.6.6/drivers/scsi/scsi_error.c 2004-05-27 09:24:46.000000000 +0900
@@ -402,6 +402,10 @@
**/
static void scsi_eh_done(struct scsi_cmnd *scmd)
{
+#if defined(CONFIG_SCSI_DUMP) || defined(CONFIG_SCSI_DUMP_MODULE)
+ if (crashdump_mode())
+ return;
+#endif
/*
* if the timeout handler is already running, then just set the
* flag which says we finished late, and return. we have no
diff -Nur linux-2.6.6.org/drivers/scsi/scsi_syms.c linux-2.6.6/drivers/scsi/scsi_syms.c
--- linux-2.6.6.org/drivers/scsi/scsi_syms.c 2004-05-20 08:58:48.000000000 +0900
+++ linux-2.6.6/drivers/scsi/scsi_syms.c 2004-05-27 09:24:46.000000000 +0900
@@ -25,6 +25,7 @@
#include "scsi.h"
#include "scsi_logging.h"
+#include "scsi_priv.h"
/*
@@ -107,3 +108,5 @@
*/
EXPORT_SYMBOL(scsi_add_timer);
EXPORT_SYMBOL(scsi_delete_timer);
+
+EXPORT_SYMBOL(scsi_decide_disposition);
diff -Nur linux-2.6.6.org/drivers/scsi/sd.c linux-2.6.6/drivers/scsi/sd.c
--- linux-2.6.6.org/drivers/scsi/sd.c 2004-05-20 08:58:48.000000000 +0900
+++ linux-2.6.6/drivers/scsi/sd.c 2004-05-27 09:24:46.000000000 +0900
@@ -192,6 +192,21 @@
up(&sd_ref_sem);
}
+#if defined(CONFIG_DISKDUMP) || defined(CONFIG_DISKDUMP_MODULE)
+Scsi_Device *sd_find_scsi_device(dev_t dev)
+{
+ struct gendisk *disk;
+ int part;
+ disk = get_gendisk(dev, &part);
+ if(disk && disk->private_data)
+ return scsi_disk(disk)->device;
+ else
+ return NULL;
+}
+
+EXPORT_SYMBOL(sd_find_scsi_device);
+#endif
+
/**
* sd_init_command - build a scsi (read or write) command from
* information in the request structure.
diff -Nur linux-2.6.6.org/include/scsi/scsi_host.h linux-2.6.6/include/scsi/scsi_host.h
--- linux-2.6.6.org/include/scsi/scsi_host.h 2004-05-20 08:59:07.000000000 +0900
+++ linux-2.6.6/include/scsi/scsi_host.h 2004-05-27 09:28:49.000000000 +0900
@@ -345,6 +345,11 @@
* module_init/module_exit.
*/
struct list_head legacy_hosts;
+
+ /*
+ * operations for dump
+ */
+ struct scsi_dump_ops *dump_ops;
};
/*
^ permalink raw reply [flat|nested] 38+ messages in thread
* [Document][PATCH]Diskdump - yet another crash dump function
2004-05-27 9:33 [PATCH]Diskdump - yet another crash dump function Takao Indoh
` (3 preceding siblings ...)
2004-05-27 12:40 ` [4/4] " Takao Indoh
@ 2004-05-27 13:34 ` Takao Indoh
2004-06-03 13:10 ` [PATCH]Diskdump " Pavel Machek
5 siblings, 0 replies; 38+ messages in thread
From: Takao Indoh @ 2004-05-27 13:34 UTC (permalink / raw)
To: linux-kernel
This is a document for diskdump including overview, installation, and so
on.
Best Regards,
Takao Indoh
Introduction
------------
Diskdump offers a function to preserve so-called crash dump.
When "panic" or "oops" happens, diskdump automatically saves system
memory to the disk. We can investigate the cause of panic using this
saved memory image which we call as crash dump.
Overview
-------
- How it works
This is 2-stage dump which is similar to traditional UNIX dump. The
1st stage starts when panic occurs, at which time the register state
and other dump-related data is stored in a header, followed by the
full contents of memory, in a dedicated dump partition. The dump
partition will have been pre-formatted with per-block signatures.
The 2nd stage is executed by rc script after the next system reboot,
at which time the savecore command will create the vmcore file from
the contents of the dump partition. After that, the per-block
signatures will be re-written over the dump partition, in
preparation for the next panic.
The handling of panic is essentially the same as netdump. It
inhibits interrupts, freezes all other CPUs, and then for each page
of data, issues the I/O command to the host adapter driver, followed
by calling the interrupt handler of the adapter driver iteratively
until the I/O has completed. The difference compared to netdump is
that diskdump saves memory to the dump partition in its own loop,
and does not wait for instructions from an external entity.
- Safety
When diskdump is executed, of course the system is in serious
trouble. Therefore, there is a possibility that user resources on
the disk containing the dump partition could be corrupted. To avoid
this danger, signatures are written over the complete dump
partition. When a panic occurs, the diskdump module reads the whole
dump partition, and checks if the signatures are written correctly.
If the signatures match, the diskdump presumes that it will be
writing to the correct device, and that there is a high possibility
that it will be able to write the dump correctly. The signatures
should be the ones which have low possibility to exist on the disk.
We decided that the following format will be written in each one
block (page-size) unit on the partition. The signatures are created
by a simple formulas based on the block number, making it a low
possibility that the created signature would ever be the same as a
user resource:
32-bit word 0: "disk"
32-bit word 1: "dump"
32-bit word 2: block number
32-bit word 3: (block number+3)*11
32-bit word 4: ((word 3)+3)*11
32-bit work 5: ((word 4)+3)*11
32-bit work 6: ((word 5)+3)*11
...
32-bit work 1023: ((word 1022)+3)*11
The diskdump module also verifies that its code and data contents
have not been corrupted. The dump module computes CRC value of its
module at the point that dump device is registered, and saves it.
When panic occurs, the dump module re-computes the CRC value at that
point and compares with the saved value. If the values aren't the
same, the dump knows that it has been damaged and aborts the dump
process.
- Reliability
After panic occurs, I/O is executed by the diskdump module calling
the queuecommand() function of the host adapter driver, and by
polling the interrupt handler directly. The dump is executed by
diskdump module and host adapter driver only. It is executed without
depending on other components of kernel, so it works even when panic
occurs in interrupt context. (XXX To be exact, a couple of drivers
are not finished completely, because they calls kmalloc() as an
extension of queuecommand())
In SCSI, a host reset is executed first, so it is possible to dump
with a stable bus condition. In a couple of drivers, especially in
the host reset process, timers and tasklets may be used. For these
drivers, I created a helper library to emulate these functions. The
helper library executes timer and tasklet functionality, which helps
to minimize the modification required to support diskdump. The size
of initrd increases slightly because the driver depends upon the
helper library.
Multiple dump devices can be registered. When a panic occurs, the
diskdump module checks each dump device condition and selects the
first sane device that it finds.
Diskdump and netdump can co-exist. When both of modules are
enabled, the diskdump works in preference to the netdump. If the
signature checking fails, if a disk I/O error occurs, or if a double
panic occurs in the diskdump process, it falls back to netdump.
- The architectures and drivers to be supported
IA32 only is supported. Regarding drivers, aic7xxx, aic79xx and
qla1280 are supported. I will support some qlogic drivers later.
The modification of supported drivers is needed, but the changes are
very small if they are SCSI drivers.
- The consistency with the netdump
The format of the saved vmcore file is completely the same as the
one which is created by the netdump-server. The vmcore file created
by the savecore command can be read by the existing crash utility.
The saved directory is /var/crash/127.0.0.1-<DATE> which is
consistent with the netdump. 127.0.0.1 is an IP address which the
netdump can never use, so there is no conflict. Our savecore
command also calls /var/crash/scripts/netdump-nospace script as does
the netdump-server daemon.
- Binary compatibility
The binary compatibility is assured because of no change of the size
of the existing structures and the arguments of existing functions.
- Impact to kernel
The host adapter driver needs to be modified to support diskdump,
but the required steps are small. For example, the modification
patch for the aic7xxx/aic9xxx drivers contains 100 lines for each.
At a minimum, a poll handler needs to be added, which is called from
diskdump to handle I/O completion. If the adapter driver does not
use timers or tasklets, that's all that is required. Even if timers
or tasklets are used, it only requires a small amount of code from
the emulation library.
Similar to netdump, the variable diskdump_mode, the diskdump_func
hook, and the diskdump_register_hook() function has been created to
register diskdump functionality.
The function sd_find_scsi_device() which gets Scsi_Device structure
from kdev which is the selected device is added to the sd module,
and is exported.
To check the result code of Scsi_Cmnd, scsi_decide_disposition() is
also exported.
scsi_done() and scsi_eh_done() discards Scsi_Cmnds when
diskdump_mode is set. With this implementation, extra processing
can be avoided in the the extension of outstanding completion of
Scsi_Cmd, which is completed in the extension of host reset process.
This is the only overhead to be added to the main route. It's
simply an addition of "if unlikely(diskdump_mode) return", so the
overhead is negligible.
Internal structure
------------------
- The interface between disdkdump.o and scsi_dump.o
scsi_dump.o is the diskdump driver for SCSI, and it registers itself
to diskdump.o. (The diskdump drivers for IDE or SATA, if and when
they are created, would also register themselves to diskdump.o.)
scsi_mod.o defines the following structures:
struct disk_dump_type {
request_queue_t *(*probe)(kdev_t);
int (*add_device)(struct disk_dump_device *);
void (*remove_device)(struct disk_dump_device *);
struct module *owner;
list_t list;
};
static struct disk_dump_type scsi_dump_type = {
.probe = scsi_dump_probe,
.add_device = scsi_dump_add_device,
.remove_device = scsi_dump_remove_device,
.owner = THIS_MODULE,
};
scsi_dump registers them by register_disk_dump_type().
The probe() handler is called from diskdump.o to determine whether
the selected kdev_t belongs to scsi_mod.o. If it returns 0 to
probe(), diskdump.o creates a disk_dump_device structure and calls
add_device(). The add_device() handler of scsi_dump.o populates the
disk_dump_device_ops of the disk_dump_device. disk_dump_device_ops
is the set of handlers which are called from diskdump.o when panic
occurs:
struct disk_dump_device_ops {
int (*sanity_check)(struct disk_dump_device *);
int (*quiesce)(struct disk_dump_device *);
int (*shutdown)(struct disk_dump_device *);
int (*rw_block)(struct disk_dump_partition *, int rw, unsigned long
block_nr, void *buf);
};
The handler functions are only called when a panic occurs.
sanity_check() checks if the selected device works normally. A
device which returns an error status will not be selected as the
dump device.
quiesce() is called after the device is selected as the dump device.
If it is SCSI, host reset is executed and Write Cache Enable bit of
the disk device is temporarily set for the dump operation.
shutdown() is called after dump is completed. If it is SCSI,
"SYNCHRONIZE CACHE" command is issued to the disk.
rw_block() executes I/O in one block unit. The length of data is a
page size, and is guaranteed to be physically contiguous. In
scsi_dump.o, it issues I/O by calling the queuecommand() handler
from the rw_block() handler. The poll handler of adapter driver is
called until the I/O has completed.
- The interface between scsi_dump.o and the adapter driver
The SCSI adapter which supports the diskdump prepares the following
structure:
struct scsi_dump_ops {
int (*sanity_check)(Scsi_Device *);
int (*quiesce)(Scsi_Device *);
int (*shutdown)(Scsi_Device *);
void (*poll)(Scsi_Device *);
};
The poll function should call the interrupt handler. It is called
repeatedly after queuecommand() is issued, and until the command is
completed.
The other handlers are called by the handlers in scsi_dump.o which
have the same names.
The adapter driver should set its own scsi_dump_ops to dump_ops
field in the scsi_host_template.
struct scsi_host_template {
(snipped)
/*
* operations for dump
*/
struct scsi_dump_ops *dump_ops;
};
Supported Hardware
------------------
Currently, diskdump supports only scsi disk(aic7xxx/aic79xx). Please
see README in diskdumputils-0.1.5.tar.bz2 for detail.
Installation
------------
1) Download software
1. Linux kernel version 2.6.6
linux-2.6.6.tar.bz2 can be downloaded from
ftp://ftp.kernel.org/pub/linux/kernel/v2.6/
2. diskdump kernel patch
diskdump-0.1.tar.gz can be downloaded from the project page.
http://sourceforge.net/projects/lkdump
3. diskdumputils
diskdumputils-0.1.5.tar.bz2 can be downloaded from the project page.
4. crash command
Download from here: ftp://people.redhat.com/anderson/
2) Build and Install Kernel
1. Untar Linux kernel source
tar -xjvf linux-2.6.6.tar.bz2
2. Apply all patches in the diskdump-0.1.tar.gz
3. Kernel Configuration
a. make menuconfig
b. Under "Device Drivers"-> "Block devices", select the following:
i. Select "m" for "Disk dump support".
c. Under "Device Drivers"-> "SCSI device support", select the
following:
i. Select "m" for "SCSI dump support".
d. Under "Kernel hacking", select the following:
i. Select "y" for "Kernel debugging".
ii. Select "y" for "Magic SysRq key". (optional)
iii. Select "y" for "Compile the kernel with debug info".
e. Configure other kernel config settings as needed.
4. make
5. make modules_install
6. Build initrd if you need
7. Copy the kernel image to the boot directory
ex. cp arch/i386/boot/bzImage /boot/vmlinuz-2.6.6-diskdump
8. Reboot
3) Build and Install diskdumputils
1. Untar diskdumputils package
tar -xjvf diskdumputils-0.1.5.tar.bz2
2. make
3. make install
4) Setup
The setup procedure is as follows. First a dump device must be
selected. Either whole device or a partition is fine. The dump
device is wholly formatted for dump, it cannot be shared with a file
system or as a swap partition. The size of dump device should be
big enough to save the whole dump. The size to be written by the
dump is the size of whole memory plus a header field. To determine
the exact size, refer to the output kernel message after the
diskdump module is loaded:
# modprobe diskdump
# dmesg | tail
header blocks: 3
bitmap blocks: 8
total number of memory blocks: 261999
total blocks written: 262010
The last number is the data size in pagesize units that will be
written by the diskdump function.
select the dump partition in /etc/sysconfig/diskdump, as in the
following example:
-------------------
DEVICE=/dev/sde1
-------------------
Next, Format the dump partition. The administrator needs to execute
this once.
# service diskdump initialformat
Lastly, enable the service:
# chkconfig diskdump on
# service diskdump start
If /proc/diskdump exists, and it shows the registered dump device,
the diskdump has been activated:
# cat /proc/diskdump
/dev/sde1 514080 1012095
To test the diskdump, use Alt-SysRq-C or "echo c >
/proc/sysrq-trigger". After completing the dump, a vmcore file will
created during the next reboot sequence, and saved in a directory of
the name format:
/var/crash/127.0.0.1-<date>
The dump format is same as the netdump one, so we can use crash command
to analyse. Crash command can be downloaded from
ftp://people.redhat.com/anderson/.
# crash vmlinux vmcore
^ permalink raw reply [flat|nested] 38+ messages in thread