LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* 100% iowait on one of cpus in current -git
@ 2007-10-22  6:22 Maxim Levitsky
  2007-10-22  9:11 ` Paolo Ornati
  2007-10-22  9:41 ` Peter Zijlstra
  0 siblings, 2 replies; 61+ messages in thread
From: Maxim Levitsky @ 2007-10-22  6:22 UTC (permalink / raw)
  To: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1018 bytes --]

Hi,

I found a bug in current -git:

On my system on of cpus stays 100% in iowait mode (I have core 2 duo)
Otherwise the system works OK, no disk activity and/or slowdown.
Suspecting that this is a swap-related problem I tried to turn swap of, but it doesn't affect anything.
It is probably some accounting bug.

If I start with init=/bin/bash, then this disappears.
I tried then to start usual /etc/init.d scripts then, and first one to show this bug was gpm.
but then I rebooted the system to X without gpm, and I still see 100% iowait.

No additional messages in dmesg.

I tried to bisect this, but eventually I run into other bugs that cause system to oops early.


This is very rough estimate of the bug location:


HEAD
......
c8f30ae54714abf494d79826d90b5e4844fbf355 - has the above bug, but otherwise works properly
.....
5c8e191e8437616a498a8e1cc0af3dd0d32bbff2 - fails early
.....
f4a1c2bce002f683801bcdbbc9fd89804614fb6b - last known working revision


Best regards,
	Maxim Levitsky

PS: .config attached.


[-- Attachment #2: .config --]
[-- Type: text/plain, Size: 48532 bytes --]

#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.23
# Mon Oct 22 07:07:52 2007
#
CONFIG_X86_32=y
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_CMOS_UPDATE=y
CONFIG_CLOCKSOURCE_WATCHDOG=y
CONFIG_GENERIC_CLOCKEVENTS=y
CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_X86=y
CONFIG_MMU=y
CONFIG_ZONE_DMA=y
CONFIG_QUICKLIST=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_DMI=y
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"

#
# General setup
#
CONFIG_EXPERIMENTAL=y
CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_LOCALVERSION=""
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
CONFIG_SYSVIPC_SYSCTL=y
CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
# CONFIG_USER_NS is not set
# CONFIG_AUDIT is not set
# CONFIG_IKCONFIG is not set
CONFIG_LOG_BUF_SHIFT=18
# CONFIG_CPUSETS is not set
CONFIG_FAIR_GROUP_SCHED=y
CONFIG_FAIR_USER_SCHED=y
# CONFIG_SYSFS_DEPRECATED is not set
CONFIG_RELAY=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_SYSCTL=y
CONFIG_EMBEDDED=y
CONFIG_UID16=y
CONFIG_SYSCTL_SYSCALL=y
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
# CONFIG_KALLSYMS_EXTRA_PASS is not set
CONFIG_HOTPLUG=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_ANON_INODES=y
CONFIG_EPOLL=y
CONFIG_SIGNALFD=y
CONFIG_EVENTFD=y
CONFIG_SHMEM=y
CONFIG_VM_EVENT_COUNTERS=y
CONFIG_SLUB_DEBUG=y
# CONFIG_SLAB is not set
CONFIG_SLUB=y
# CONFIG_SLOB is not set
CONFIG_RT_MUTEXES=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y
CONFIG_BLOCK=y
# CONFIG_LBD is not set
CONFIG_BLK_DEV_IO_TRACE=y
# CONFIG_LSF is not set
CONFIG_BLK_DEV_BSG=y

#
# IO Schedulers
#
CONFIG_IOSCHED_NOOP=y
CONFIG_IOSCHED_AS=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
CONFIG_DEFAULT_AS=y
# CONFIG_DEFAULT_DEADLINE is not set
# CONFIG_DEFAULT_CFQ is not set
# CONFIG_DEFAULT_NOOP is not set
CONFIG_DEFAULT_IOSCHED="anticipatory"
CONFIG_PREEMPT_NOTIFIERS=y

#
# Processor type and features
#
CONFIG_TICK_ONESHOT=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
CONFIG_SMP=y
CONFIG_X86_PC=y
# CONFIG_X86_ELAN is not set
# CONFIG_X86_VOYAGER is not set
# CONFIG_X86_NUMAQ is not set
# CONFIG_X86_SUMMIT is not set
# CONFIG_X86_BIGSMP is not set
# CONFIG_X86_VISWS is not set
# CONFIG_X86_GENERICARCH is not set
# CONFIG_X86_ES7000 is not set
CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
# CONFIG_PARAVIRT is not set
# CONFIG_M386 is not set
# CONFIG_M486 is not set
# CONFIG_M586 is not set
# CONFIG_M586TSC is not set
# CONFIG_M586MMX is not set
# CONFIG_M686 is not set
# CONFIG_MPENTIUMII is not set
# CONFIG_MPENTIUMIII is not set
# CONFIG_MPENTIUMM is not set
CONFIG_MCORE2=y
# CONFIG_MPENTIUM4 is not set
# CONFIG_MK6 is not set
# CONFIG_MK7 is not set
# CONFIG_MK8 is not set
# CONFIG_MCRUSOE is not set
# CONFIG_MEFFICEON is not set
# CONFIG_MWINCHIPC6 is not set
# CONFIG_MWINCHIP2 is not set
# CONFIG_MWINCHIP3D is not set
# CONFIG_MGEODEGX1 is not set
# CONFIG_MGEODE_LX is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
# CONFIG_MVIAC7 is not set
# CONFIG_X86_GENERIC is not set
CONFIG_X86_CMPXCHG=y
CONFIG_X86_L1_CACHE_SHIFT=6
CONFIG_X86_XADD=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
# CONFIG_ARCH_HAS_ILOG2_U32 is not set
# CONFIG_ARCH_HAS_ILOG2_U64 is not set
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_X86_WP_WORKS_OK=y
CONFIG_X86_INVLPG=y
CONFIG_X86_BSWAP=y
CONFIG_X86_POPAD_OK=y
CONFIG_X86_GOOD_APIC=y
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
CONFIG_X86_TSC=y
CONFIG_X86_MINIMUM_CPU_FAMILY=4
# CONFIG_HPET_TIMER is not set
CONFIG_NR_CPUS=8
# CONFIG_SCHED_SMT is not set
CONFIG_SCHED_MC=y
# CONFIG_PREEMPT_NONE is not set
# CONFIG_PREEMPT_VOLUNTARY is not set
CONFIG_PREEMPT=y
CONFIG_PREEMPT_BKL=y
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
CONFIG_X86_MCE=y
CONFIG_X86_MCE_NONFATAL=y
CONFIG_X86_MCE_P4THERMAL=y
CONFIG_VM86=y
# CONFIG_TOSHIBA is not set
# CONFIG_I8K is not set
# CONFIG_X86_REBOOTFIXUPS is not set
CONFIG_MICROCODE=y
CONFIG_MICROCODE_OLD_INTERFACE=y
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y

#
# Firmware Drivers
#
CONFIG_EDD=y
# CONFIG_DELL_RBU is not set
# CONFIG_DCDBAS is not set
CONFIG_DMIID=y
# CONFIG_NOHIGHMEM is not set
# CONFIG_HIGHMEM4G is not set
CONFIG_HIGHMEM64G=y
CONFIG_VMSPLIT_3G=y
# CONFIG_VMSPLIT_3G_OPT is not set
# CONFIG_VMSPLIT_2G is not set
# CONFIG_VMSPLIT_2G_OPT is not set
# CONFIG_VMSPLIT_1G is not set
CONFIG_PAGE_OFFSET=0xC0000000
CONFIG_HIGHMEM=y
CONFIG_X86_PAE=y
CONFIG_ARCH_FLATMEM_ENABLE=y
CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
CONFIG_ARCH_POPULATES_NODE_MAP=y
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_FLATMEM_MANUAL=y
# CONFIG_DISCONTIGMEM_MANUAL is not set
# CONFIG_SPARSEMEM_MANUAL is not set
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
CONFIG_SPARSEMEM_STATIC=y
# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
CONFIG_SPLIT_PTLOCK_CPUS=4
CONFIG_RESOURCES_64BIT=y
CONFIG_ZONE_DMA_FLAG=1
CONFIG_BOUNCE=y
CONFIG_NR_QUICK=1
CONFIG_VIRT_TO_BUS=y
CONFIG_HIGHPTE=y
# CONFIG_MATH_EMULATION is not set
CONFIG_MTRR=y
# CONFIG_EFI is not set
# CONFIG_IRQBALANCE is not set
CONFIG_SECCOMP=y
# CONFIG_HZ_100 is not set
# CONFIG_HZ_250 is not set
# CONFIG_HZ_300 is not set
CONFIG_HZ_1000=y
CONFIG_HZ=1000
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
CONFIG_PHYSICAL_START=0x200000
CONFIG_RELOCATABLE=y
CONFIG_PHYSICAL_ALIGN=0x100000
CONFIG_HOTPLUG_CPU=y
CONFIG_COMPAT_VDSO=y
CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y

#
# Power management options (ACPI, APM)
#
CONFIG_PM=y
# CONFIG_PM_LEGACY is not set
# CONFIG_PM_DEBUG is not set
CONFIG_PM_SLEEP_SMP=y
CONFIG_PM_SLEEP=y
CONFIG_SUSPEND_SMP_POSSIBLE=y
CONFIG_SUSPEND=y
CONFIG_HIBERNATION_SMP_POSSIBLE=y
CONFIG_HIBERNATION=y
CONFIG_PM_STD_PARTITION=""
CONFIG_ACPI=y
CONFIG_ACPI_SLEEP=y
# CONFIG_ACPI_PROCFS is not set
# CONFIG_ACPI_PROC_EVENT is not set
# CONFIG_ACPI_AC is not set
# CONFIG_ACPI_BATTERY is not set
CONFIG_ACPI_BUTTON=y
# CONFIG_ACPI_FAN is not set
# CONFIG_ACPI_DOCK is not set
CONFIG_ACPI_PROCESSOR=y
CONFIG_ACPI_HOTPLUG_CPU=y
# CONFIG_ACPI_THERMAL is not set
# CONFIG_ACPI_ASUS is not set
# CONFIG_ACPI_TOSHIBA is not set
# CONFIG_ACPI_CUSTOM_DSDT is not set
CONFIG_ACPI_BLACKLIST_YEAR=0
# CONFIG_ACPI_DEBUG is not set
CONFIG_ACPI_EC=y
CONFIG_ACPI_POWER=y
CONFIG_ACPI_SYSTEM=y
CONFIG_X86_PM_TIMER=y
CONFIG_ACPI_CONTAINER=y
# CONFIG_ACPI_SBS is not set
# CONFIG_APM is not set

#
# CPU Frequency scaling
#
CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_TABLE=y
# CONFIG_CPU_FREQ_DEBUG is not set
CONFIG_CPU_FREQ_STAT=y
CONFIG_CPU_FREQ_STAT_DETAILS=y
CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
# CONFIG_CPU_FREQ_GOV_USERSPACE is not set
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set

#
# CPUFreq processor drivers
#
CONFIG_X86_ACPI_CPUFREQ=m
# CONFIG_X86_POWERNOW_K6 is not set
# CONFIG_X86_POWERNOW_K7 is not set
# CONFIG_X86_POWERNOW_K8 is not set
# CONFIG_X86_GX_SUSPMOD is not set
# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
# CONFIG_X86_SPEEDSTEP_ICH is not set
# CONFIG_X86_SPEEDSTEP_SMI is not set
# CONFIG_X86_P4_CLOCKMOD is not set
# CONFIG_X86_CPUFREQ_NFORCE2 is not set
# CONFIG_X86_LONGRUN is not set
# CONFIG_X86_LONGHAUL is not set
# CONFIG_X86_E_POWERSAVER is not set

#
# shared options
#
# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set
# CONFIG_X86_SPEEDSTEP_LIB is not set

#
# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
#
CONFIG_PCI=y
# CONFIG_PCI_GOBIOS is not set
# CONFIG_PCI_GOMMCONFIG is not set
# CONFIG_PCI_GODIRECT is not set
CONFIG_PCI_GOANY=y
CONFIG_PCI_BIOS=y
CONFIG_PCI_DIRECT=y
CONFIG_PCI_MMCONFIG=y
CONFIG_PCI_DOMAINS=y
CONFIG_PCIEPORTBUS=y
# CONFIG_HOTPLUG_PCI_PCIE is not set
CONFIG_PCIEAER=y
CONFIG_ARCH_SUPPORTS_MSI=y
CONFIG_PCI_MSI=y
# CONFIG_PCI_DEBUG is not set
# CONFIG_HT_IRQ is not set
CONFIG_ISA_DMA_API=y
# CONFIG_ISA is not set
# CONFIG_MCA is not set
# CONFIG_SCx200 is not set
# CONFIG_PCCARD is not set
CONFIG_HOTPLUG_PCI=m
CONFIG_HOTPLUG_PCI_FAKE=m
# CONFIG_HOTPLUG_PCI_COMPAQ is not set
# CONFIG_HOTPLUG_PCI_IBM is not set
# CONFIG_HOTPLUG_PCI_ACPI is not set
# CONFIG_HOTPLUG_PCI_CPCI is not set
# CONFIG_HOTPLUG_PCI_SHPC is not set

#
# Executable file formats
#
CONFIG_BINFMT_ELF=y
# CONFIG_BINFMT_AOUT is not set
CONFIG_BINFMT_MISC=y

#
# Networking
#
CONFIG_NET=y

#
# Networking options
#
CONFIG_PACKET=y
CONFIG_PACKET_MMAP=y
CONFIG_UNIX=y
CONFIG_XFRM=y
CONFIG_XFRM_USER=y
# CONFIG_XFRM_SUB_POLICY is not set
# CONFIG_XFRM_MIGRATE is not set
CONFIG_NET_KEY=y
# CONFIG_NET_KEY_MIGRATE is not set
CONFIG_INET=y
# CONFIG_IP_MULTICAST is not set
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_ASK_IP_FIB_HASH=y
# CONFIG_IP_FIB_TRIE is not set
CONFIG_IP_FIB_HASH=y
# CONFIG_IP_MULTIPLE_TABLES is not set
# CONFIG_IP_ROUTE_MULTIPATH is not set
# CONFIG_IP_ROUTE_VERBOSE is not set
# CONFIG_IP_PNP is not set
# CONFIG_NET_IPIP is not set
# CONFIG_NET_IPGRE is not set
# CONFIG_ARPD is not set
# CONFIG_SYN_COOKIES is not set
CONFIG_INET_AH=y
CONFIG_INET_ESP=y
CONFIG_INET_IPCOMP=y
CONFIG_INET_XFRM_TUNNEL=y
CONFIG_INET_TUNNEL=y
CONFIG_INET_XFRM_MODE_TRANSPORT=y
CONFIG_INET_XFRM_MODE_TUNNEL=y
CONFIG_INET_XFRM_MODE_BEET=y
# CONFIG_INET_LRO is not set
CONFIG_INET_DIAG=y
CONFIG_INET_TCP_DIAG=y
# CONFIG_TCP_CONG_ADVANCED is not set
CONFIG_TCP_CONG_CUBIC=y
CONFIG_DEFAULT_TCP_CONG="cubic"
# CONFIG_TCP_MD5SIG is not set
# CONFIG_IPV6 is not set
# CONFIG_INET6_XFRM_TUNNEL is not set
# CONFIG_INET6_TUNNEL is not set
# CONFIG_NETWORK_SECMARK is not set
# CONFIG_NETFILTER is not set
# CONFIG_IP_DCCP is not set
# CONFIG_IP_SCTP is not set
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
# CONFIG_BRIDGE is not set
# CONFIG_VLAN_8021Q is not set
# CONFIG_DECNET is not set
CONFIG_LLC=m
# CONFIG_LLC2 is not set
CONFIG_IPX=m
CONFIG_IPX_INTERN=y
# CONFIG_ATALK is not set
# CONFIG_X25 is not set
# CONFIG_LAPB is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set

#
# QoS and/or fair queueing
#
# CONFIG_NET_SCHED is not set

#
# Network testing
#
# CONFIG_NET_PKTGEN is not set
# CONFIG_HAMRADIO is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
# CONFIG_AF_RXRPC is not set

#
# Wireless
#
# CONFIG_CFG80211 is not set
# CONFIG_WIRELESS_EXT is not set
# CONFIG_MAC80211 is not set
# CONFIG_IEEE80211 is not set
# CONFIG_RFKILL is not set
# CONFIG_NET_9P is not set

#
# Device Drivers
#

#
# Generic Driver Options
#
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_STANDALONE is not set
CONFIG_PREVENT_FIRMWARE_BUILD=y
CONFIG_FW_LOADER=y
# CONFIG_DEBUG_DRIVER is not set
# CONFIG_DEBUG_DEVRES is not set
# CONFIG_SYS_HYPERVISOR is not set
CONFIG_CONNECTOR=y
CONFIG_PROC_EVENTS=y
CONFIG_MTD=y
# CONFIG_MTD_DEBUG is not set
# CONFIG_MTD_CONCAT is not set
CONFIG_MTD_PARTITIONS=y
# CONFIG_MTD_REDBOOT_PARTS is not set
# CONFIG_MTD_CMDLINE_PARTS is not set

#
# User Modules And Translation Layers
#
CONFIG_MTD_CHAR=y
CONFIG_MTD_BLKDEVS=y
CONFIG_MTD_BLOCK=y
# CONFIG_FTL is not set
# CONFIG_NFTL is not set
# CONFIG_INFTL is not set
# CONFIG_RFD_FTL is not set
# CONFIG_SSFDC is not set
# CONFIG_MTD_OOPS is not set

#
# RAM/ROM/Flash chip drivers
#
# CONFIG_MTD_CFI is not set
# CONFIG_MTD_JEDECPROBE is not set
CONFIG_MTD_MAP_BANK_WIDTH_1=y
CONFIG_MTD_MAP_BANK_WIDTH_2=y
CONFIG_MTD_MAP_BANK_WIDTH_4=y
# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
CONFIG_MTD_CFI_I1=y
CONFIG_MTD_CFI_I2=y
# CONFIG_MTD_CFI_I4 is not set
# CONFIG_MTD_CFI_I8 is not set
# CONFIG_MTD_RAM is not set
# CONFIG_MTD_ROM is not set
# CONFIG_MTD_ABSENT is not set

#
# Mapping drivers for chip access
#
CONFIG_MTD_COMPLEX_MAPPINGS=y
# CONFIG_MTD_TS5500 is not set
# CONFIG_MTD_PCI is not set
# CONFIG_MTD_INTEL_VR_NOR is not set
# CONFIG_MTD_PLATRAM is not set

#
# Self-contained MTD device drivers
#
# CONFIG_MTD_PMC551 is not set
# CONFIG_MTD_SLRAM is not set
CONFIG_MTD_PHRAM=m
# CONFIG_MTD_MTDRAM is not set
# CONFIG_MTD_BLOCK2MTD is not set

#
# Disk-On-Chip Device Drivers
#
# CONFIG_MTD_DOC2000 is not set
# CONFIG_MTD_DOC2001 is not set
# CONFIG_MTD_DOC2001PLUS is not set
# CONFIG_MTD_NAND is not set
# CONFIG_MTD_ONENAND is not set

#
# UBI - Unsorted block images
#
# CONFIG_MTD_UBI is not set
CONFIG_PARPORT=y
CONFIG_PARPORT_PC=y
CONFIG_PARPORT_PC_FIFO=y
# CONFIG_PARPORT_PC_SUPERIO is not set
# CONFIG_PARPORT_GSC is not set
# CONFIG_PARPORT_AX88796 is not set
CONFIG_PARPORT_1284=y
CONFIG_PNP=y
# CONFIG_PNP_DEBUG is not set

#
# Protocols
#
CONFIG_PNPACPI=y
CONFIG_BLK_DEV=y
CONFIG_BLK_DEV_FD=y
# CONFIG_PARIDE is not set
# CONFIG_BLK_CPQ_DA is not set
# CONFIG_BLK_CPQ_CISS_DA is not set
# CONFIG_BLK_DEV_DAC960 is not set
# CONFIG_BLK_DEV_UMEM is not set
# CONFIG_BLK_DEV_COW_COMMON is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_CRYPTOLOOP=y
CONFIG_BLK_DEV_NBD=m
# CONFIG_BLK_DEV_SX8 is not set
# CONFIG_BLK_DEV_UB is not set
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=2
CONFIG_BLK_DEV_RAM_SIZE=65536
CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
CONFIG_CDROM_PKTCDVD=y
CONFIG_CDROM_PKTCDVD_BUFFERS=32
CONFIG_CDROM_PKTCDVD_WCACHE=y
CONFIG_ATA_OVER_ETH=m
CONFIG_MISC_DEVICES=y
# CONFIG_IBM_ASM is not set
# CONFIG_PHANTOM is not set
# CONFIG_EEPROM_93CX6 is not set
# CONFIG_SGI_IOC4 is not set
# CONFIG_TIFM_CORE is not set
# CONFIG_MSI_LAPTOP is not set
# CONFIG_SONY_LAPTOP is not set
# CONFIG_THINKPAD_ACPI is not set
# CONFIG_IDE is not set

#
# SCSI device support
#
# CONFIG_RAID_ATTRS is not set
CONFIG_SCSI=y
CONFIG_SCSI_DMA=y
# CONFIG_SCSI_TGT is not set
CONFIG_SCSI_NETLINK=y
# CONFIG_SCSI_PROC_FS is not set

#
# SCSI support type (disk, tape, CD-ROM)
#
CONFIG_BLK_DEV_SD=y
# CONFIG_CHR_DEV_ST is not set
# CONFIG_CHR_DEV_OSST is not set
CONFIG_BLK_DEV_SR=y
# CONFIG_BLK_DEV_SR_VENDOR is not set
CONFIG_CHR_DEV_SG=y
# CONFIG_CHR_DEV_SCH is not set

#
# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
#
# CONFIG_SCSI_MULTI_LUN is not set
# CONFIG_SCSI_CONSTANTS is not set
# CONFIG_SCSI_LOGGING is not set
CONFIG_SCSI_SCAN_ASYNC=y
CONFIG_SCSI_WAIT_SCAN=m

#
# SCSI Transports
#
CONFIG_SCSI_SPI_ATTRS=y
CONFIG_SCSI_FC_ATTRS=y
CONFIG_SCSI_ISCSI_ATTRS=y
CONFIG_SCSI_SAS_ATTRS=y
# CONFIG_SCSI_SAS_LIBSAS is not set
# CONFIG_SCSI_SRP_ATTRS is not set
CONFIG_SCSI_LOWLEVEL=y
CONFIG_ISCSI_TCP=m
# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_ACARD is not set
# CONFIG_SCSI_AACRAID is not set
# CONFIG_SCSI_AIC7XXX is not set
# CONFIG_SCSI_AIC7XXX_OLD is not set
# CONFIG_SCSI_AIC79XX is not set
# CONFIG_SCSI_AIC94XX is not set
# CONFIG_SCSI_DPT_I2O is not set
# CONFIG_SCSI_ADVANSYS is not set
# CONFIG_SCSI_ARCMSR is not set
# CONFIG_MEGARAID_NEWGEN is not set
# CONFIG_MEGARAID_LEGACY is not set
# CONFIG_MEGARAID_SAS is not set
# CONFIG_SCSI_HPTIOP is not set
CONFIG_SCSI_BUSLOGIC=m
# CONFIG_SCSI_OMIT_FLASHPOINT is not set
# CONFIG_SCSI_DMX3191D is not set
# CONFIG_SCSI_EATA is not set
# CONFIG_SCSI_FUTURE_DOMAIN is not set
# CONFIG_SCSI_GDTH is not set
# CONFIG_SCSI_IPS is not set
# CONFIG_SCSI_INITIO is not set
# CONFIG_SCSI_INIA100 is not set
# CONFIG_SCSI_PPA is not set
# CONFIG_SCSI_IMM is not set
# CONFIG_SCSI_STEX is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set
# CONFIG_SCSI_IPR is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
# CONFIG_SCSI_QLA_FC is not set
# CONFIG_SCSI_QLA_ISCSI is not set
# CONFIG_SCSI_LPFC is not set
# CONFIG_SCSI_DC395x is not set
# CONFIG_SCSI_DC390T is not set
# CONFIG_SCSI_NSP32 is not set
# CONFIG_SCSI_DEBUG is not set
# CONFIG_SCSI_SRP is not set
CONFIG_ATA=y
# CONFIG_ATA_NONSTANDARD is not set
CONFIG_ATA_ACPI=y
CONFIG_SATA_AHCI=m
# CONFIG_SATA_SVW is not set
CONFIG_ATA_PIIX=m
# CONFIG_SATA_MV is not set
# CONFIG_SATA_NV is not set
# CONFIG_PDC_ADMA is not set
# CONFIG_SATA_QSTOR is not set
# CONFIG_SATA_PROMISE is not set
# CONFIG_SATA_SX4 is not set
# CONFIG_SATA_SIL is not set
# CONFIG_SATA_SIL24 is not set
# CONFIG_SATA_SIS is not set
# CONFIG_SATA_ULI is not set
# CONFIG_SATA_VIA is not set
# CONFIG_SATA_VITESSE is not set
# CONFIG_SATA_INIC162X is not set
# CONFIG_PATA_ACPI is not set
# CONFIG_PATA_ALI is not set
# CONFIG_PATA_AMD is not set
# CONFIG_PATA_ARTOP is not set
# CONFIG_PATA_ATIIXP is not set
# CONFIG_PATA_CMD640_PCI is not set
# CONFIG_PATA_CMD64X is not set
# CONFIG_PATA_CS5520 is not set
# CONFIG_PATA_CS5530 is not set
# CONFIG_PATA_CS5535 is not set
# CONFIG_PATA_CS5536 is not set
# CONFIG_PATA_CYPRESS is not set
# CONFIG_PATA_EFAR is not set
# CONFIG_ATA_GENERIC is not set
# CONFIG_PATA_HPT366 is not set
# CONFIG_PATA_HPT37X is not set
# CONFIG_PATA_HPT3X2N is not set
# CONFIG_PATA_HPT3X3 is not set
# CONFIG_PATA_IT821X is not set
# CONFIG_PATA_IT8213 is not set
# CONFIG_PATA_JMICRON is not set
# CONFIG_PATA_TRIFLEX is not set
CONFIG_PATA_MARVELL=m
# CONFIG_PATA_MPIIX is not set
# CONFIG_PATA_OLDPIIX is not set
# CONFIG_PATA_NETCELL is not set
# CONFIG_PATA_NS87410 is not set
# CONFIG_PATA_NS87415 is not set
# CONFIG_PATA_OPTI is not set
# CONFIG_PATA_OPTIDMA is not set
# CONFIG_PATA_PDC_OLD is not set
# CONFIG_PATA_RADISYS is not set
# CONFIG_PATA_RZ1000 is not set
# CONFIG_PATA_SC1200 is not set
# CONFIG_PATA_SERVERWORKS is not set
# CONFIG_PATA_PDC2027X is not set
# CONFIG_PATA_SIL680 is not set
# CONFIG_PATA_SIS is not set
# CONFIG_PATA_VIA is not set
# CONFIG_PATA_WINBOND is not set
# CONFIG_PATA_PLATFORM is not set
CONFIG_MD=y
CONFIG_BLK_DEV_MD=m
CONFIG_MD_LINEAR=m
CONFIG_MD_RAID0=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID10=m
CONFIG_MD_RAID456=m
CONFIG_MD_RAID5_RESHAPE=y
# CONFIG_MD_MULTIPATH is not set
# CONFIG_MD_FAULTY is not set
CONFIG_BLK_DEV_DM=m
# CONFIG_DM_DEBUG is not set
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
CONFIG_DM_MIRROR=m
CONFIG_DM_ZERO=m
# CONFIG_DM_MULTIPATH is not set
# CONFIG_DM_DELAY is not set
CONFIG_FUSION=y
CONFIG_FUSION_SPI=m
CONFIG_FUSION_FC=m
CONFIG_FUSION_SAS=m
CONFIG_FUSION_MAX_SGE=128
# CONFIG_FUSION_CTL is not set
# CONFIG_FUSION_LOGGING is not set

#
# IEEE 1394 (FireWire) support
#
# CONFIG_FIREWIRE is not set
CONFIG_IEEE1394=m

#
# Subsystem Options
#
# CONFIG_IEEE1394_VERBOSEDEBUG is not set

#
# Controllers
#
# CONFIG_IEEE1394_PCILYNX is not set
CONFIG_IEEE1394_OHCI1394=m

#
# Protocols
#
CONFIG_IEEE1394_VIDEO1394=m
CONFIG_IEEE1394_SBP2=m
CONFIG_IEEE1394_SBP2_PHYS_DMA=y
CONFIG_IEEE1394_ETH1394_ROM_ENTRY=y
CONFIG_IEEE1394_ETH1394=m
# CONFIG_IEEE1394_DV1394 is not set
CONFIG_IEEE1394_RAWIO=m
# CONFIG_I2O is not set
# CONFIG_MACINTOSH_DRIVERS is not set
CONFIG_NETDEVICES=y
CONFIG_NETDEVICES_MULTIQUEUE=y
# CONFIG_DUMMY is not set
# CONFIG_BONDING is not set
# CONFIG_MACVLAN is not set
# CONFIG_EQUALIZER is not set
CONFIG_TUN=y
# CONFIG_VETH is not set
# CONFIG_NET_SB1000 is not set
# CONFIG_IP1000 is not set
# CONFIG_ARCNET is not set
# CONFIG_PHYLIB is not set
CONFIG_NET_ETHERNET=y
CONFIG_MII=y
# CONFIG_HAPPYMEAL is not set
# CONFIG_SUNGEM is not set
# CONFIG_CASSINI is not set
# CONFIG_NET_VENDOR_3COM is not set
CONFIG_NET_TULIP=y
# CONFIG_DE2104X is not set
# CONFIG_TULIP is not set
# CONFIG_DE4X5 is not set
# CONFIG_WINBOND_840 is not set
CONFIG_DM9102=m
# CONFIG_ULI526X is not set
# CONFIG_HP100 is not set
# CONFIG_IBM_NEW_EMAC_ZMII is not set
# CONFIG_IBM_NEW_EMAC_RGMII is not set
# CONFIG_IBM_NEW_EMAC_TAH is not set
# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
CONFIG_NET_PCI=y
CONFIG_PCNET32=m
CONFIG_PCNET32_NAPI=y
# CONFIG_AMD8111_ETH is not set
# CONFIG_ADAPTEC_STARFIRE is not set
# CONFIG_B44 is not set
# CONFIG_FORCEDETH is not set
# CONFIG_EEPRO100 is not set
# CONFIG_E100 is not set
# CONFIG_FEALNX is not set
# CONFIG_NATSEMI is not set
# CONFIG_NE2K_PCI is not set
# CONFIG_8139CP is not set
# CONFIG_8139TOO is not set
# CONFIG_SIS900 is not set
# CONFIG_EPIC100 is not set
# CONFIG_SUNDANCE is not set
# CONFIG_TLAN is not set
# CONFIG_VIA_RHINE is not set
# CONFIG_SC92031 is not set
# CONFIG_NET_POCKET is not set
CONFIG_NETDEV_1000=y
# CONFIG_ACENIC is not set
# CONFIG_DL2K is not set
CONFIG_E1000=m
# CONFIG_E1000_NAPI is not set
# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
# CONFIG_E1000E is not set
# CONFIG_NS83820 is not set
# CONFIG_HAMACHI is not set
# CONFIG_YELLOWFIN is not set
# CONFIG_R8169 is not set
# CONFIG_SIS190 is not set
# CONFIG_SKGE is not set
# CONFIG_SKY2 is not set
CONFIG_SK98LIN=y
# CONFIG_VIA_VELOCITY is not set
# CONFIG_TIGON3 is not set
# CONFIG_BNX2 is not set
# CONFIG_QLA3XXX is not set
# CONFIG_ATL1 is not set
# CONFIG_NETDEV_10000 is not set
# CONFIG_TR is not set

#
# Wireless LAN
#
# CONFIG_WLAN_PRE80211 is not set
# CONFIG_WLAN_80211 is not set

#
# USB Network Adapters
#
# CONFIG_USB_CATC is not set
# CONFIG_USB_KAWETH is not set
# CONFIG_USB_PEGASUS is not set
# CONFIG_USB_RTL8150 is not set
# CONFIG_USB_USBNET_MII is not set
# CONFIG_USB_USBNET is not set
# CONFIG_WAN is not set
# CONFIG_FDDI is not set
# CONFIG_HIPPI is not set
# CONFIG_PLIP is not set
# CONFIG_PPP is not set
# CONFIG_SLIP is not set
# CONFIG_NET_FC is not set
# CONFIG_SHAPER is not set
CONFIG_NETCONSOLE=m
# CONFIG_NETCONSOLE_DYNAMIC is not set
CONFIG_NETPOLL=y
CONFIG_NETPOLL_TRAP=y
CONFIG_NET_POLL_CONTROLLER=y
# CONFIG_ISDN is not set
# CONFIG_PHONE is not set

#
# Input device support
#
CONFIG_INPUT=y
CONFIG_INPUT_FF_MEMLESS=y
# CONFIG_INPUT_POLLDEV is not set

#
# Userland interfaces
#
CONFIG_INPUT_MOUSEDEV=y
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
CONFIG_INPUT_JOYDEV=y
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_EVBUG is not set

#
# Input Device Drivers
#
CONFIG_INPUT_KEYBOARD=y
CONFIG_KEYBOARD_ATKBD=y
# CONFIG_KEYBOARD_SUNKBD is not set
# CONFIG_KEYBOARD_LKKBD is not set
# CONFIG_KEYBOARD_XTKBD is not set
# CONFIG_KEYBOARD_NEWTON is not set
# CONFIG_KEYBOARD_STOWAWAY is not set
CONFIG_INPUT_MOUSE=y
CONFIG_MOUSE_PS2=y
CONFIG_MOUSE_PS2_ALPS=y
CONFIG_MOUSE_PS2_LOGIPS2PP=y
CONFIG_MOUSE_PS2_SYNAPTICS=y
CONFIG_MOUSE_PS2_LIFEBOOK=y
CONFIG_MOUSE_PS2_TRACKPOINT=y
# CONFIG_MOUSE_PS2_TOUCHKIT is not set
CONFIG_MOUSE_SERIAL=m
# CONFIG_MOUSE_APPLETOUCH is not set
# CONFIG_MOUSE_VSXXXAA is not set
CONFIG_INPUT_JOYSTICK=y
# CONFIG_JOYSTICK_ANALOG is not set
# CONFIG_JOYSTICK_A3D is not set
# CONFIG_JOYSTICK_ADI is not set
# CONFIG_JOYSTICK_COBRA is not set
# CONFIG_JOYSTICK_GF2K is not set
# CONFIG_JOYSTICK_GRIP is not set
# CONFIG_JOYSTICK_GRIP_MP is not set
# CONFIG_JOYSTICK_GUILLEMOT is not set
# CONFIG_JOYSTICK_INTERACT is not set
# CONFIG_JOYSTICK_SIDEWINDER is not set
# CONFIG_JOYSTICK_TMDC is not set
CONFIG_JOYSTICK_IFORCE=m
CONFIG_JOYSTICK_IFORCE_USB=y
# CONFIG_JOYSTICK_IFORCE_232 is not set
# CONFIG_JOYSTICK_WARRIOR is not set
# CONFIG_JOYSTICK_MAGELLAN is not set
# CONFIG_JOYSTICK_SPACEORB is not set
# CONFIG_JOYSTICK_SPACEBALL is not set
# CONFIG_JOYSTICK_STINGER is not set
# CONFIG_JOYSTICK_TWIDJOY is not set
# CONFIG_JOYSTICK_DB9 is not set
# CONFIG_JOYSTICK_GAMECON is not set
# CONFIG_JOYSTICK_TURBOGRAFX is not set
# CONFIG_JOYSTICK_JOYDUMP is not set
# CONFIG_JOYSTICK_XPAD is not set
# CONFIG_INPUT_TABLET is not set
# CONFIG_INPUT_TOUCHSCREEN is not set
CONFIG_INPUT_MISC=y
CONFIG_INPUT_PCSPKR=m
# CONFIG_INPUT_WISTRON_BTNS is not set
# CONFIG_INPUT_ATLAS_BTNS is not set
# CONFIG_INPUT_ATI_REMOTE is not set
# CONFIG_INPUT_ATI_REMOTE2 is not set
# CONFIG_INPUT_KEYSPAN_REMOTE is not set
# CONFIG_INPUT_POWERMATE is not set
# CONFIG_INPUT_YEALINK is not set
# CONFIG_INPUT_UINPUT is not set

#
# Hardware I/O ports
#
CONFIG_SERIO=y
CONFIG_SERIO_I8042=y
CONFIG_SERIO_SERPORT=m
# CONFIG_SERIO_CT82C710 is not set
CONFIG_SERIO_PARKBD=m
# CONFIG_SERIO_PCIPS2 is not set
CONFIG_SERIO_LIBPS2=y
CONFIG_SERIO_RAW=m
CONFIG_GAMEPORT=y
CONFIG_GAMEPORT_NS558=y
# CONFIG_GAMEPORT_L4 is not set
# CONFIG_GAMEPORT_EMU10K1 is not set
# CONFIG_GAMEPORT_FM801 is not set

#
# Character devices
#
CONFIG_VT=y
CONFIG_VT_CONSOLE=y
CONFIG_HW_CONSOLE=y
CONFIG_VT_HW_CONSOLE_BINDING=y
# CONFIG_SERIAL_NONSTANDARD is not set

#
# Serial drivers
#
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_FIX_EARLYCON_MEM=y
# CONFIG_SERIAL_8250_PCI is not set
CONFIG_SERIAL_8250_PNP=y
CONFIG_SERIAL_8250_NR_UARTS=2
CONFIG_SERIAL_8250_RUNTIME_UARTS=2
# CONFIG_SERIAL_8250_EXTENDED is not set

#
# Non-8250 serial port support
#
CONFIG_SERIAL_CORE=y
CONFIG_SERIAL_CORE_CONSOLE=y
# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
CONFIG_LEGACY_PTYS=y
CONFIG_LEGACY_PTY_COUNT=3
CONFIG_PRINTER=m
CONFIG_LP_CONSOLE=y
CONFIG_PPDEV=m
# CONFIG_TIPAR is not set
# CONFIG_IPMI_HANDLER is not set
CONFIG_WATCHDOG=y
# CONFIG_WATCHDOG_NOWAYOUT is not set

#
# Watchdog Device Drivers
#
# CONFIG_SOFT_WATCHDOG is not set
# CONFIG_ACQUIRE_WDT is not set
# CONFIG_ADVANTECH_WDT is not set
# CONFIG_ALIM1535_WDT is not set
# CONFIG_ALIM7101_WDT is not set
# CONFIG_SC520_WDT is not set
# CONFIG_EUROTECH_WDT is not set
# CONFIG_IB700_WDT is not set
# CONFIG_IBMASR is not set
# CONFIG_WAFER_WDT is not set
# CONFIG_I6300ESB_WDT is not set
CONFIG_ITCO_WDT=m
# CONFIG_ITCO_VENDOR_SUPPORT is not set
# CONFIG_SC1200_WDT is not set
# CONFIG_PC87413_WDT is not set
# CONFIG_60XX_WDT is not set
# CONFIG_SBC8360_WDT is not set
# CONFIG_CPU5_WDT is not set
# CONFIG_SMSC37B787_WDT is not set
# CONFIG_W83627HF_WDT is not set
# CONFIG_W83697HF_WDT is not set
# CONFIG_W83877F_WDT is not set
# CONFIG_W83977F_WDT is not set
# CONFIG_MACHZ_WDT is not set
# CONFIG_SBC_EPX_C3_WATCHDOG is not set

#
# PCI-based Watchdog Cards
#
# CONFIG_PCIPCWATCHDOG is not set
# CONFIG_WDTPCI is not set

#
# USB-based Watchdog Cards
#
# CONFIG_USBPCWATCHDOG is not set
# CONFIG_HW_RANDOM is not set
CONFIG_NVRAM=y
# CONFIG_RTC is not set
# CONFIG_GEN_RTC is not set
# CONFIG_R3964 is not set
# CONFIG_APPLICOM is not set
# CONFIG_SONYPI is not set
# CONFIG_MWAVE is not set
# CONFIG_PC8736x_GPIO is not set
# CONFIG_NSC_GPIO is not set
# CONFIG_CS5535_GPIO is not set
# CONFIG_RAW_DRIVER is not set
# CONFIG_HPET is not set
# CONFIG_HANGCHECK_TIMER is not set
# CONFIG_TCG_TPM is not set
# CONFIG_TELCLOCK is not set
CONFIG_DEVPORT=y
CONFIG_I2C=y
CONFIG_I2C_BOARDINFO=y
CONFIG_I2C_CHARDEV=y

#
# I2C Algorithms
#
CONFIG_I2C_ALGOBIT=y
# CONFIG_I2C_ALGOPCF is not set
# CONFIG_I2C_ALGOPCA is not set

#
# I2C Hardware Bus support
#
# CONFIG_I2C_ALI1535 is not set
# CONFIG_I2C_ALI1563 is not set
# CONFIG_I2C_ALI15X3 is not set
# CONFIG_I2C_AMD756 is not set
# CONFIG_I2C_AMD8111 is not set
CONFIG_I2C_I801=m
# CONFIG_I2C_I810 is not set
CONFIG_I2C_PIIX4=m
# CONFIG_I2C_NFORCE2 is not set
# CONFIG_I2C_OCORES is not set
# CONFIG_I2C_PARPORT is not set
# CONFIG_I2C_PARPORT_LIGHT is not set
# CONFIG_I2C_PROSAVAGE is not set
# CONFIG_I2C_SAVAGE4 is not set
# CONFIG_I2C_SIMTEC is not set
# CONFIG_SCx200_ACB is not set
# CONFIG_I2C_SIS5595 is not set
# CONFIG_I2C_SIS630 is not set
# CONFIG_I2C_SIS96X is not set
# CONFIG_I2C_TAOS_EVM is not set
# CONFIG_I2C_STUB is not set
# CONFIG_I2C_TINY_USB is not set
# CONFIG_I2C_VIA is not set
# CONFIG_I2C_VIAPRO is not set
# CONFIG_I2C_VOODOO3 is not set

#
# Miscellaneous I2C Chip support
#
# CONFIG_SENSORS_DS1337 is not set
# CONFIG_SENSORS_DS1374 is not set
# CONFIG_DS1682 is not set
CONFIG_SENSORS_EEPROM=m
# CONFIG_SENSORS_PCF8574 is not set
# CONFIG_SENSORS_PCA9539 is not set
# CONFIG_SENSORS_PCF8591 is not set
# CONFIG_SENSORS_MAX6875 is not set
# CONFIG_SENSORS_TSL2550 is not set
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
# CONFIG_I2C_DEBUG_CHIP is not set

#
# SPI support
#
# CONFIG_SPI is not set
# CONFIG_SPI_MASTER is not set
# CONFIG_W1 is not set
# CONFIG_POWER_SUPPLY is not set
CONFIG_HWMON=y
# CONFIG_HWMON_VID is not set
# CONFIG_SENSORS_ABITUGURU is not set
# CONFIG_SENSORS_ABITUGURU3 is not set
# CONFIG_SENSORS_AD7418 is not set
# CONFIG_SENSORS_ADM1021 is not set
# CONFIG_SENSORS_ADM1025 is not set
# CONFIG_SENSORS_ADM1026 is not set
# CONFIG_SENSORS_ADM1029 is not set
# CONFIG_SENSORS_ADM1031 is not set
# CONFIG_SENSORS_ADM9240 is not set
# CONFIG_SENSORS_ADT7470 is not set
# CONFIG_SENSORS_K8TEMP is not set
# CONFIG_SENSORS_ASB100 is not set
# CONFIG_SENSORS_ATXP1 is not set
# CONFIG_SENSORS_DS1621 is not set
# CONFIG_SENSORS_F71805F is not set
# CONFIG_SENSORS_F71882FG is not set
# CONFIG_SENSORS_F75375S is not set
# CONFIG_SENSORS_FSCHER is not set
# CONFIG_SENSORS_FSCPOS is not set
# CONFIG_SENSORS_FSCHMD is not set
# CONFIG_SENSORS_GL518SM is not set
# CONFIG_SENSORS_GL520SM is not set
CONFIG_SENSORS_CORETEMP=m
# CONFIG_SENSORS_IT87 is not set
# CONFIG_SENSORS_LM63 is not set
# CONFIG_SENSORS_LM75 is not set
# CONFIG_SENSORS_LM77 is not set
# CONFIG_SENSORS_LM78 is not set
# CONFIG_SENSORS_LM80 is not set
# CONFIG_SENSORS_LM83 is not set
# CONFIG_SENSORS_LM85 is not set
# CONFIG_SENSORS_LM87 is not set
# CONFIG_SENSORS_LM90 is not set
# CONFIG_SENSORS_LM92 is not set
# CONFIG_SENSORS_LM93 is not set
# CONFIG_SENSORS_MAX1619 is not set
# CONFIG_SENSORS_MAX6650 is not set
# CONFIG_SENSORS_PC87360 is not set
# CONFIG_SENSORS_PC87427 is not set
# CONFIG_SENSORS_SIS5595 is not set
# CONFIG_SENSORS_DME1737 is not set
# CONFIG_SENSORS_SMSC47M1 is not set
# CONFIG_SENSORS_SMSC47M192 is not set
# CONFIG_SENSORS_SMSC47B397 is not set
# CONFIG_SENSORS_THMC50 is not set
# CONFIG_SENSORS_VIA686A is not set
# CONFIG_SENSORS_VT1211 is not set
# CONFIG_SENSORS_VT8231 is not set
# CONFIG_SENSORS_W83781D is not set
# CONFIG_SENSORS_W83791D is not set
# CONFIG_SENSORS_W83792D is not set
# CONFIG_SENSORS_W83793 is not set
# CONFIG_SENSORS_W83L785TS is not set
# CONFIG_SENSORS_W83627HF is not set
# CONFIG_SENSORS_W83627EHF is not set
# CONFIG_SENSORS_HDAPS is not set
# CONFIG_SENSORS_APPLESMC is not set
# CONFIG_HWMON_DEBUG_CHIP is not set

#
# Sonics Silicon Backplane
#
CONFIG_SSB_POSSIBLE=y
# CONFIG_SSB is not set

#
# Multifunction device drivers
#
# CONFIG_MFD_SM501 is not set

#
# Multimedia devices
#
# CONFIG_VIDEO_DEV is not set
# CONFIG_DVB_CORE is not set
# CONFIG_DAB is not set

#
# Graphics support
#
CONFIG_AGP=y
# CONFIG_AGP_ALI is not set
# CONFIG_AGP_ATI is not set
# CONFIG_AGP_AMD is not set
# CONFIG_AGP_AMD64 is not set
CONFIG_AGP_INTEL=m
# CONFIG_AGP_NVIDIA is not set
# CONFIG_AGP_SIS is not set
# CONFIG_AGP_SWORKS is not set
# CONFIG_AGP_VIA is not set
# CONFIG_AGP_EFFICEON is not set
CONFIG_DRM=y
# CONFIG_DRM_TDFX is not set
# CONFIG_DRM_R128 is not set
# CONFIG_DRM_RADEON is not set
# CONFIG_DRM_I810 is not set
# CONFIG_DRM_I830 is not set
CONFIG_DRM_I915=m
# CONFIG_DRM_MGA is not set
# CONFIG_DRM_SIS is not set
# CONFIG_DRM_VIA is not set
# CONFIG_DRM_SAVAGE is not set
CONFIG_VGASTATE=m
# CONFIG_VIDEO_OUTPUT_CONTROL is not set
CONFIG_FB=y
# CONFIG_FIRMWARE_EDID is not set
# CONFIG_FB_DDC is not set
CONFIG_FB_CFB_FILLRECT=y
CONFIG_FB_CFB_COPYAREA=y
CONFIG_FB_CFB_IMAGEBLIT=y
# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
# CONFIG_FB_SYS_FILLRECT is not set
# CONFIG_FB_SYS_COPYAREA is not set
# CONFIG_FB_SYS_IMAGEBLIT is not set
# CONFIG_FB_SYS_FOPS is not set
CONFIG_FB_DEFERRED_IO=y
CONFIG_FB_SVGALIB=m
# CONFIG_FB_MACMODES is not set
# CONFIG_FB_BACKLIGHT is not set
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y

#
# Frame buffer hardware drivers
#
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_PM2 is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_ARC is not set
# CONFIG_FB_ASILIANT is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_VGA16 is not set
# CONFIG_FB_UVESA is not set
CONFIG_FB_VESA=y
# CONFIG_FB_HECUBA is not set
# CONFIG_FB_HGA is not set
# CONFIG_FB_S1D13XXX is not set
# CONFIG_FB_NVIDIA is not set
# CONFIG_FB_RIVA is not set
# CONFIG_FB_I810 is not set
# CONFIG_FB_LE80578 is not set
# CONFIG_FB_INTEL is not set
# CONFIG_FB_MATROX is not set
# CONFIG_FB_RADEON is not set
# CONFIG_FB_ATY128 is not set
# CONFIG_FB_ATY is not set
CONFIG_FB_S3=m
# CONFIG_FB_SAVAGE is not set
# CONFIG_FB_SIS is not set
# CONFIG_FB_NEOMAGIC is not set
# CONFIG_FB_KYRO is not set
# CONFIG_FB_3DFX is not set
# CONFIG_FB_VOODOO1 is not set
# CONFIG_FB_VT8623 is not set
# CONFIG_FB_CYBLA is not set
# CONFIG_FB_TRIDENT is not set
# CONFIG_FB_ARK is not set
# CONFIG_FB_PM3 is not set
# CONFIG_FB_GEODE is not set
# CONFIG_FB_VIRTUAL is not set
CONFIG_BACKLIGHT_LCD_SUPPORT=y
CONFIG_LCD_CLASS_DEVICE=y
CONFIG_BACKLIGHT_CLASS_DEVICE=y
# CONFIG_BACKLIGHT_CORGI is not set
# CONFIG_BACKLIGHT_PROGEAR is not set

#
# Display device support
#
# CONFIG_DISPLAY_SUPPORT is not set

#
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
# CONFIG_VGACON_SOFT_SCROLLBACK is not set
CONFIG_VIDEO_SELECT=y
CONFIG_DUMMY_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
# CONFIG_FONTS is not set
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
CONFIG_LOGO_LINUX_CLUT224=y

#
# Sound
#
CONFIG_SOUND=y

#
# Advanced Linux Sound Architecture
#
CONFIG_SND=m
CONFIG_SND_TIMER=m
CONFIG_SND_PCM=m
CONFIG_SND_HWDEP=m
CONFIG_SND_RAWMIDI=m
CONFIG_SND_SEQUENCER=m
# CONFIG_SND_SEQ_DUMMY is not set
CONFIG_SND_OSSEMUL=y
CONFIG_SND_MIXER_OSS=m
CONFIG_SND_PCM_OSS=m
CONFIG_SND_PCM_OSS_PLUGINS=y
CONFIG_SND_SEQUENCER_OSS=y
CONFIG_SND_DYNAMIC_MINORS=y
# CONFIG_SND_SUPPORT_OLD_API is not set
CONFIG_SND_VERBOSE_PROCFS=y
# CONFIG_SND_VERBOSE_PRINTK is not set
# CONFIG_SND_DEBUG is not set

#
# Generic devices
#
# CONFIG_SND_DUMMY is not set
# CONFIG_SND_VIRMIDI is not set
# CONFIG_SND_MTPAV is not set
# CONFIG_SND_MTS64 is not set
# CONFIG_SND_SERIAL_U16550 is not set
# CONFIG_SND_MPU401 is not set
# CONFIG_SND_PORTMAN2X4 is not set

#
# PCI devices
#
# CONFIG_SND_AD1889 is not set
# CONFIG_SND_ALS300 is not set
# CONFIG_SND_ALS4000 is not set
# CONFIG_SND_ALI5451 is not set
# CONFIG_SND_ATIIXP is not set
# CONFIG_SND_ATIIXP_MODEM is not set
# CONFIG_SND_AU8810 is not set
# CONFIG_SND_AU8820 is not set
# CONFIG_SND_AU8830 is not set
# CONFIG_SND_AZT3328 is not set
# CONFIG_SND_BT87X is not set
# CONFIG_SND_CA0106 is not set
# CONFIG_SND_CMIPCI is not set
# CONFIG_SND_CS4281 is not set
# CONFIG_SND_CS46XX is not set
# CONFIG_SND_CS5530 is not set
# CONFIG_SND_CS5535AUDIO is not set
# CONFIG_SND_DARLA20 is not set
# CONFIG_SND_GINA20 is not set
# CONFIG_SND_LAYLA20 is not set
# CONFIG_SND_DARLA24 is not set
# CONFIG_SND_GINA24 is not set
# CONFIG_SND_LAYLA24 is not set
# CONFIG_SND_MONA is not set
# CONFIG_SND_MIA is not set
# CONFIG_SND_ECHO3G is not set
# CONFIG_SND_INDIGO is not set
# CONFIG_SND_INDIGOIO is not set
# CONFIG_SND_INDIGODJ is not set
# CONFIG_SND_EMU10K1 is not set
# CONFIG_SND_EMU10K1X is not set
# CONFIG_SND_ENS1370 is not set
# CONFIG_SND_ENS1371 is not set
# CONFIG_SND_ES1938 is not set
# CONFIG_SND_ES1968 is not set
# CONFIG_SND_FM801 is not set
CONFIG_SND_HDA_INTEL=m
# CONFIG_SND_HDA_HWDEP is not set
CONFIG_SND_HDA_CODEC_REALTEK=y
CONFIG_SND_HDA_CODEC_ANALOG=y
CONFIG_SND_HDA_CODEC_SIGMATEL=y
CONFIG_SND_HDA_CODEC_VIA=y
CONFIG_SND_HDA_CODEC_ATIHDMI=y
CONFIG_SND_HDA_CODEC_CONEXANT=y
CONFIG_SND_HDA_CODEC_CMEDIA=y
CONFIG_SND_HDA_CODEC_SI3054=y
CONFIG_SND_HDA_GENERIC=y
# CONFIG_SND_HDA_POWER_SAVE is not set
# CONFIG_SND_HDSP is not set
# CONFIG_SND_HDSPM is not set
# CONFIG_SND_ICE1712 is not set
# CONFIG_SND_ICE1724 is not set
# CONFIG_SND_INTEL8X0 is not set
# CONFIG_SND_INTEL8X0M is not set
# CONFIG_SND_KORG1212 is not set
# CONFIG_SND_MAESTRO3 is not set
# CONFIG_SND_MIXART is not set
# CONFIG_SND_NM256 is not set
# CONFIG_SND_PCXHR is not set
# CONFIG_SND_RIPTIDE is not set
# CONFIG_SND_RME32 is not set
# CONFIG_SND_RME96 is not set
# CONFIG_SND_RME9652 is not set
# CONFIG_SND_SONICVIBES is not set
# CONFIG_SND_TRIDENT is not set
# CONFIG_SND_VIA82XX is not set
# CONFIG_SND_VIA82XX_MODEM is not set
# CONFIG_SND_VX222 is not set
# CONFIG_SND_YMFPCI is not set

#
# USB devices
#
CONFIG_SND_USB_AUDIO=m
# CONFIG_SND_USB_USX2Y is not set
# CONFIG_SND_USB_CAIAQ is not set

#
# System on Chip audio support
#
# CONFIG_SND_SOC is not set

#
# SoC Audio support for SuperH
#

#
# Open Sound System
#
CONFIG_SOUND_PRIME=m
# CONFIG_SOUND_TRIDENT is not set
# CONFIG_SOUND_MSNDCLAS is not set
# CONFIG_SOUND_MSNDPIN is not set
# CONFIG_SOUND_OSS is not set
CONFIG_HID_SUPPORT=y
CONFIG_HID=y
# CONFIG_HID_DEBUG is not set
# CONFIG_HIDRAW is not set

#
# USB Input Devices
#
CONFIG_USB_HID=y
# CONFIG_USB_HIDINPUT_POWERBOOK is not set
CONFIG_HID_FF=y
CONFIG_HID_PID=y
CONFIG_LOGITECH_FF=y
CONFIG_PANTHERLORD_FF=y
CONFIG_THRUSTMASTER_FF=y
CONFIG_ZEROPLUS_FF=y
CONFIG_USB_HIDDEV=y
CONFIG_USB_SUPPORT=y
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
CONFIG_USB_ARCH_HAS_EHCI=y
CONFIG_USB=y
# CONFIG_USB_DEBUG is not set

#
# Miscellaneous USB options
#
CONFIG_USB_DEVICEFS=y
# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_DYNAMIC_MINORS=y
CONFIG_USB_SUSPEND=y
CONFIG_USB_PERSIST=y
# CONFIG_USB_OTG is not set

#
# USB Host Controller Drivers
#
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_SPLIT_ISO=y
CONFIG_USB_EHCI_ROOT_HUB_TT=y
CONFIG_USB_EHCI_TT_NEWSCHED=y
# CONFIG_USB_ISP116X_HCD is not set
# CONFIG_USB_OHCI_HCD is not set
CONFIG_USB_UHCI_HCD=y
# CONFIG_USB_SL811_HCD is not set
# CONFIG_USB_R8A66597_HCD is not set

#
# USB Device Class drivers
#
# CONFIG_USB_ACM is not set
CONFIG_USB_PRINTER=m

#
# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
#

#
# may also be needed; see USB_STORAGE Help for more information
#
CONFIG_USB_STORAGE=m
# CONFIG_USB_STORAGE_DEBUG is not set
CONFIG_USB_STORAGE_DATAFAB=y
CONFIG_USB_STORAGE_FREECOM=y
CONFIG_USB_STORAGE_DPCM=y
CONFIG_USB_STORAGE_USBAT=y
CONFIG_USB_STORAGE_SDDR09=y
CONFIG_USB_STORAGE_SDDR55=y
CONFIG_USB_STORAGE_JUMPSHOT=y
CONFIG_USB_STORAGE_ALAUDA=y
CONFIG_USB_STORAGE_KARMA=y
# CONFIG_USB_LIBUSUAL is not set

#
# USB Imaging devices
#
# CONFIG_USB_MDC800 is not set
# CONFIG_USB_MICROTEK is not set
CONFIG_USB_MON=y

#
# USB port drivers
#
# CONFIG_USB_USS720 is not set

#
# USB Serial Converter support
#
# CONFIG_USB_SERIAL is not set

#
# USB Miscellaneous drivers
#
# CONFIG_USB_EMI62 is not set
# CONFIG_USB_EMI26 is not set
# CONFIG_USB_ADUTUX is not set
# CONFIG_USB_AUERSWALD is not set
# CONFIG_USB_RIO500 is not set
# CONFIG_USB_LEGOTOWER is not set
# CONFIG_USB_LCD is not set
# CONFIG_USB_BERRY_CHARGE is not set
# CONFIG_USB_LED is not set
# CONFIG_USB_CYPRESS_CY7C63 is not set
# CONFIG_USB_CYTHERM is not set
# CONFIG_USB_PHIDGET is not set
# CONFIG_USB_IDMOUSE is not set
# CONFIG_USB_FTDI_ELAN is not set
# CONFIG_USB_APPLEDISPLAY is not set
# CONFIG_USB_SISUSBVGA is not set
# CONFIG_USB_LD is not set
# CONFIG_USB_TRANCEVIBRATOR is not set
# CONFIG_USB_IOWARRIOR is not set
# CONFIG_USB_TEST is not set

#
# USB DSL modem support
#

#
# USB Gadget Support
#
# CONFIG_USB_GADGET is not set
# CONFIG_MMC is not set
# CONFIG_NEW_LEDS is not set
# CONFIG_INFINIBAND is not set
# CONFIG_EDAC is not set
CONFIG_RTC_LIB=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_HCTOSYS=y
CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
# CONFIG_RTC_DEBUG is not set

#
# RTC interfaces
#
CONFIG_RTC_INTF_SYSFS=y
CONFIG_RTC_INTF_PROC=y
CONFIG_RTC_INTF_DEV=y
# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
# CONFIG_RTC_DRV_TEST is not set

#
# I2C RTC drivers
#
# CONFIG_RTC_DRV_DS1307 is not set
# CONFIG_RTC_DRV_DS1374 is not set
# CONFIG_RTC_DRV_DS1672 is not set
# CONFIG_RTC_DRV_MAX6900 is not set
# CONFIG_RTC_DRV_RS5C372 is not set
# CONFIG_RTC_DRV_ISL1208 is not set
# CONFIG_RTC_DRV_X1205 is not set
# CONFIG_RTC_DRV_PCF8563 is not set
# CONFIG_RTC_DRV_PCF8583 is not set
# CONFIG_RTC_DRV_M41T80 is not set

#
# SPI RTC drivers
#

#
# Platform RTC drivers
#
CONFIG_RTC_DRV_CMOS=y
# CONFIG_RTC_DRV_DS1553 is not set
# CONFIG_RTC_DRV_STK17TA8 is not set
# CONFIG_RTC_DRV_DS1742 is not set
# CONFIG_RTC_DRV_M48T86 is not set
# CONFIG_RTC_DRV_M48T59 is not set
# CONFIG_RTC_DRV_V3020 is not set

#
# on-CPU RTC drivers
#
# CONFIG_DMADEVICES is not set
# CONFIG_AUXDISPLAY is not set
CONFIG_VIRTUALIZATION=y
CONFIG_KVM=m
CONFIG_KVM_INTEL=m
# CONFIG_KVM_AMD is not set

#
# Userspace I/O
#
# CONFIG_UIO is not set

#
# File systems
#
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
CONFIG_EXT2_FS_XIP=y
CONFIG_FS_XIP=y
CONFIG_EXT3_FS=y
CONFIG_EXT3_FS_XATTR=y
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_EXT3_FS_SECURITY=y
# CONFIG_EXT4DEV_FS is not set
CONFIG_JBD=y
# CONFIG_JBD_DEBUG is not set
CONFIG_FS_MBCACHE=y
CONFIG_REISERFS_FS=m
# CONFIG_REISERFS_CHECK is not set
CONFIG_REISERFS_PROC_INFO=y
CONFIG_REISERFS_FS_XATTR=y
CONFIG_REISERFS_FS_POSIX_ACL=y
CONFIG_REISERFS_FS_SECURITY=y
CONFIG_JFS_FS=m
CONFIG_JFS_POSIX_ACL=y
CONFIG_JFS_SECURITY=y
# CONFIG_JFS_DEBUG is not set
CONFIG_JFS_STATISTICS=y
CONFIG_FS_POSIX_ACL=y
CONFIG_XFS_FS=m
CONFIG_XFS_QUOTA=y
CONFIG_XFS_SECURITY=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_XFS_RT=y
# CONFIG_GFS2_FS is not set
# CONFIG_OCFS2_FS is not set
# CONFIG_MINIX_FS is not set
CONFIG_ROMFS_FS=m
CONFIG_INOTIFY=y
CONFIG_INOTIFY_USER=y
CONFIG_QUOTA=y
# CONFIG_QUOTA_NETLINK_INTERFACE is not set
CONFIG_PRINT_QUOTA_WARNING=y
# CONFIG_QFMT_V1 is not set
CONFIG_QFMT_V2=y
CONFIG_QUOTACTL=y
CONFIG_DNOTIFY=y
CONFIG_AUTOFS_FS=m
CONFIG_AUTOFS4_FS=m
CONFIG_FUSE_FS=m
CONFIG_GENERIC_ACL=y

#
# CD-ROM/DVD Filesystems
#
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
CONFIG_UDF_FS=m
CONFIG_UDF_NLS=y

#
# DOS/FAT/NT Filesystems
#
CONFIG_FAT_FS=m
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_FAT_DEFAULT_CODEPAGE=866
CONFIG_FAT_DEFAULT_IOCHARSET="utf8"
CONFIG_NTFS_FS=m
CONFIG_NTFS_DEBUG=y
CONFIG_NTFS_RW=y

#
# Pseudo filesystems
#
CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
CONFIG_PROC_VMCORE=y
CONFIG_PROC_SYSCTL=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_HUGETLB_PAGE=y
CONFIG_CONFIGFS_FS=y

#
# Miscellaneous filesystems
#
# CONFIG_ADFS_FS is not set
# CONFIG_AFFS_FS is not set
# CONFIG_HFS_FS is not set
# CONFIG_HFSPLUS_FS is not set
# CONFIG_BEFS_FS is not set
# CONFIG_BFS_FS is not set
# CONFIG_EFS_FS is not set
# CONFIG_JFFS2_FS is not set
CONFIG_CRAMFS=m
# CONFIG_VXFS_FS is not set
# CONFIG_HPFS_FS is not set
# CONFIG_QNX4FS_FS is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
CONFIG_NETWORK_FILESYSTEMS=y
# CONFIG_NFS_FS is not set
# CONFIG_NFSD is not set
CONFIG_SMB_FS=y
CONFIG_SMB_NLS_DEFAULT=y
CONFIG_SMB_NLS_REMOTE="y"
CONFIG_CIFS=y
CONFIG_CIFS_STATS=y
CONFIG_CIFS_STATS2=y
CONFIG_CIFS_WEAK_PW_HASH=y
CONFIG_CIFS_XATTR=y
CONFIG_CIFS_POSIX=y
CONFIG_CIFS_DEBUG2=y
CONFIG_CIFS_EXPERIMENTAL=y
CONFIG_CIFS_UPCALL=y
CONFIG_NCP_FS=y
# CONFIG_NCPFS_PACKET_SIGNING is not set
# CONFIG_NCPFS_IOCTL_LOCKING is not set
# CONFIG_NCPFS_STRONG is not set
# CONFIG_NCPFS_NFS_NS is not set
# CONFIG_NCPFS_OS2_NS is not set
# CONFIG_NCPFS_SMALLDOS is not set
# CONFIG_NCPFS_NLS is not set
# CONFIG_NCPFS_EXTRAS is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set

#
# Partition Types
#
CONFIG_PARTITION_ADVANCED=y
# CONFIG_ACORN_PARTITION is not set
# CONFIG_OSF_PARTITION is not set
# CONFIG_AMIGA_PARTITION is not set
# CONFIG_ATARI_PARTITION is not set
# CONFIG_MAC_PARTITION is not set
CONFIG_MSDOS_PARTITION=y
# CONFIG_BSD_DISKLABEL is not set
# CONFIG_MINIX_SUBPARTITION is not set
# CONFIG_SOLARIS_X86_PARTITION is not set
# CONFIG_UNIXWARE_DISKLABEL is not set
CONFIG_LDM_PARTITION=y
CONFIG_LDM_DEBUG=y
# CONFIG_SGI_PARTITION is not set
# CONFIG_ULTRIX_PARTITION is not set
# CONFIG_SUN_PARTITION is not set
# CONFIG_KARMA_PARTITION is not set
CONFIG_EFI_PARTITION=y
# CONFIG_SYSV68_PARTITION is not set
CONFIG_NLS=y
CONFIG_NLS_DEFAULT="cp437"
CONFIG_NLS_CODEPAGE_437=y
# CONFIG_NLS_CODEPAGE_737 is not set
# CONFIG_NLS_CODEPAGE_775 is not set
CONFIG_NLS_CODEPAGE_850=y
CONFIG_NLS_CODEPAGE_852=y
CONFIG_NLS_CODEPAGE_855=y
# CONFIG_NLS_CODEPAGE_857 is not set
# CONFIG_NLS_CODEPAGE_860 is not set
# CONFIG_NLS_CODEPAGE_861 is not set
CONFIG_NLS_CODEPAGE_862=y
# CONFIG_NLS_CODEPAGE_863 is not set
# CONFIG_NLS_CODEPAGE_864 is not set
# CONFIG_NLS_CODEPAGE_865 is not set
CONFIG_NLS_CODEPAGE_866=y
# CONFIG_NLS_CODEPAGE_869 is not set
# CONFIG_NLS_CODEPAGE_936 is not set
# CONFIG_NLS_CODEPAGE_950 is not set
# CONFIG_NLS_CODEPAGE_932 is not set
# CONFIG_NLS_CODEPAGE_949 is not set
# CONFIG_NLS_CODEPAGE_874 is not set
CONFIG_NLS_ISO8859_8=y
CONFIG_NLS_CODEPAGE_1250=y
# CONFIG_NLS_CODEPAGE_1251 is not set
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_ISO8859_2=y
# CONFIG_NLS_ISO8859_3 is not set
# CONFIG_NLS_ISO8859_4 is not set
CONFIG_NLS_ISO8859_5=y
# CONFIG_NLS_ISO8859_6 is not set
# CONFIG_NLS_ISO8859_7 is not set
# CONFIG_NLS_ISO8859_9 is not set
# CONFIG_NLS_ISO8859_13 is not set
# CONFIG_NLS_ISO8859_14 is not set
# CONFIG_NLS_ISO8859_15 is not set
CONFIG_NLS_KOI8_R=y
# CONFIG_NLS_KOI8_U is not set
CONFIG_NLS_UTF8=y
# CONFIG_DLM is not set
# CONFIG_INSTRUMENTATION is not set

#
# Kernel hacking
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
CONFIG_PRINTK_TIME=y
CONFIG_ENABLE_MUST_CHECK=y
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
CONFIG_DEBUG_FS=y
# CONFIG_HEADERS_CHECK is not set
CONFIG_DEBUG_KERNEL=y
# CONFIG_DEBUG_SHIRQ is not set
CONFIG_DETECT_SOFTLOCKUP=y
CONFIG_SCHED_DEBUG=y
# CONFIG_SCHEDSTATS is not set
CONFIG_TIMER_STATS=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_DEBUG_PREEMPT=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_DEBUG_PI_LIST=y
# CONFIG_RT_MUTEX_TESTER is not set
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCKDEP=y
# CONFIG_LOCK_STAT is not set
# CONFIG_DEBUG_LOCKDEP is not set
CONFIG_TRACE_IRQFLAGS=y
CONFIG_DEBUG_SPINLOCK_SLEEP=y
# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
CONFIG_STACKTRACE=y
# CONFIG_DEBUG_KOBJECT is not set
# CONFIG_DEBUG_HIGHMEM is not set
CONFIG_DEBUG_BUGVERBOSE=y
CONFIG_DEBUG_INFO=y
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_LIST is not set
CONFIG_FRAME_POINTER=y
# CONFIG_FORCED_INLINING is not set
# CONFIG_BOOT_PRINTK_DELAY is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_FAULT_INJECTION is not set
CONFIG_EARLY_PRINTK=y
CONFIG_DEBUG_STACKOVERFLOW=y
# CONFIG_DEBUG_STACK_USAGE is not set

#
# Page alloc debug is incompatible with Software Suspend on i386
#
CONFIG_DEBUG_RODATA=y
# CONFIG_4KSTACKS is not set
CONFIG_X86_FIND_SMP_CONFIG=y
CONFIG_X86_MPPARSE=y
CONFIG_DOUBLEFAULT=y

#
# Security options
#
# CONFIG_KEYS is not set
# CONFIG_SECURITY is not set
# CONFIG_SECURITY_FILE_CAPABILITIES is not set
CONFIG_XOR_BLOCKS=m
CONFIG_ASYNC_CORE=m
CONFIG_ASYNC_MEMCPY=m
CONFIG_ASYNC_XOR=m
CONFIG_CRYPTO=y
CONFIG_CRYPTO_ALGAPI=y
CONFIG_CRYPTO_BLKCIPHER=y
CONFIG_CRYPTO_HASH=y
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_HMAC=y
# CONFIG_CRYPTO_XCBC is not set
# CONFIG_CRYPTO_NULL is not set
# CONFIG_CRYPTO_MD4 is not set
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_SHA1=y
# CONFIG_CRYPTO_SHA256 is not set
# CONFIG_CRYPTO_SHA512 is not set
# CONFIG_CRYPTO_WP512 is not set
# CONFIG_CRYPTO_TGR192 is not set
# CONFIG_CRYPTO_GF128MUL is not set
# CONFIG_CRYPTO_ECB is not set
CONFIG_CRYPTO_CBC=y
# CONFIG_CRYPTO_PCBC is not set
# CONFIG_CRYPTO_LRW is not set
# CONFIG_CRYPTO_XTS is not set
# CONFIG_CRYPTO_CRYPTD is not set
CONFIG_CRYPTO_DES=y
# CONFIG_CRYPTO_FCRYPT is not set
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_TWOFISH is not set
# CONFIG_CRYPTO_TWOFISH_586 is not set
# CONFIG_CRYPTO_SERPENT is not set
# CONFIG_CRYPTO_AES is not set
# CONFIG_CRYPTO_AES_586 is not set
CONFIG_CRYPTO_CAST5=y
# CONFIG_CRYPTO_CAST6 is not set
# CONFIG_CRYPTO_TEA is not set
# CONFIG_CRYPTO_ARC4 is not set
# CONFIG_CRYPTO_KHAZAD is not set
# CONFIG_CRYPTO_ANUBIS is not set
# CONFIG_CRYPTO_SEED is not set
CONFIG_CRYPTO_DEFLATE=y
# CONFIG_CRYPTO_MICHAEL_MIC is not set
CONFIG_CRYPTO_CRC32C=y
# CONFIG_CRYPTO_CAMELLIA is not set
# CONFIG_CRYPTO_TEST is not set
# CONFIG_CRYPTO_AUTHENC is not set
# CONFIG_CRYPTO_HW is not set

#
# Library routines
#
CONFIG_BITREVERSE=y
# CONFIG_CRC_CCITT is not set
# CONFIG_CRC16 is not set
# CONFIG_CRC_ITU_T is not set
CONFIG_CRC32=y
# CONFIG_CRC7 is not set
CONFIG_LIBCRC32C=y
CONFIG_ZLIB_INFLATE=y
CONFIG_ZLIB_DEFLATE=y
CONFIG_PLIST=y
CONFIG_HAS_IOMEM=y
CONFIG_HAS_IOPORT=y
CONFIG_HAS_DMA=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_PENDING_IRQ=y
CONFIG_X86_SMP=y
CONFIG_X86_HT=y
CONFIG_X86_BIOS_REBOOT=y
CONFIG_X86_TRAMPOLINE=y
CONFIG_KTIME_SCALAR=y

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
  2007-10-22  6:22 100% iowait on one of cpus in current -git Maxim Levitsky
@ 2007-10-22  9:11 ` Paolo Ornati
  2007-10-22  9:43   ` Maxim Levitsky
  2007-10-22  9:41 ` Peter Zijlstra
  1 sibling, 1 reply; 61+ messages in thread
From: Paolo Ornati @ 2007-10-22  9:11 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: linux-kernel

On Mon, 22 Oct 2007 08:22:52 +0200
Maxim Levitsky <maximlevitsky@gmail.com> wrote:

> I tried to bisect this, but eventually I run into other bugs that cause system to oops early.

You can pick a different revision to test with:
	git-reset --hard  "SHA1"

Choose one with "git-bisect visualize".

-- 
	Paolo Ornati
	Linux 2.6.23-ge8b8c977 on x86_64

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
  2007-10-22  6:22 100% iowait on one of cpus in current -git Maxim Levitsky
  2007-10-22  9:11 ` Paolo Ornati
@ 2007-10-22  9:41 ` Peter Zijlstra
  2007-10-22  9:59   ` Maxim Levitsky
  1 sibling, 1 reply; 61+ messages in thread
From: Peter Zijlstra @ 2007-10-22  9:41 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: linux-kernel

On Mon, 2007-10-22 at 08:22 +0200, Maxim Levitsky wrote:
> Hi,
> 
> I found a bug in current -git:
> 
> On my system on of cpus stays 100% in iowait mode (I have core 2 duo)
> Otherwise the system works OK, no disk activity and/or slowdown.
> Suspecting that this is a swap-related problem I tried to turn swap of, but it doesn't affect anything.
> It is probably some accounting bug.
> 
> If I start with init=/bin/bash, then this disappears.
> I tried then to start usual /etc/init.d scripts then, and first one to show this bug was gpm.
> but then I rebooted the system to X without gpm, and I still see 100% iowait.
> 
> No additional messages in dmesg.

does sysrq-t show any D state tasks?


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
  2007-10-22  9:11 ` Paolo Ornati
@ 2007-10-22  9:43   ` Maxim Levitsky
  0 siblings, 0 replies; 61+ messages in thread
From: Maxim Levitsky @ 2007-10-22  9:43 UTC (permalink / raw)
  To: Paolo Ornati; +Cc: linux-kernel

On Monday 22 October 2007 11:11:52 Paolo Ornati wrote:
> On Mon, 22 Oct 2007 08:22:52 +0200
> Maxim Levitsky <maximlevitsky@gmail.com> wrote:
> 
> > I tried to bisect this, but eventually I run into other bugs that cause system to oops early.
> 
> You can pick a different revision to test with:
> 	git-reset --hard  "SHA1"
> 
> Choose one with "git-bisect visualize".
> 

Well, I know that, and I did try this a lot.

The problem is that between good and bad revisions there are few bugs that cause the system to oops early,
thus I can't tell whenever the 100% iowait bug is present or not.

Best regards,
	Maxim Levitsky

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
  2007-10-22  9:41 ` Peter Zijlstra
@ 2007-10-22  9:59   ` Maxim Levitsky
  2007-10-22 10:22     ` Peter Zijlstra
  0 siblings, 1 reply; 61+ messages in thread
From: Maxim Levitsky @ 2007-10-22  9:59 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel

On Monday 22 October 2007 11:41:57 Peter Zijlstra wrote:
> On Mon, 2007-10-22 at 08:22 +0200, Maxim Levitsky wrote:
> > Hi,
> > 
> > I found a bug in current -git:
> > 
> > On my system on of cpus stays 100% in iowait mode (I have core 2 duo)
> > Otherwise the system works OK, no disk activity and/or slowdown.
> > Suspecting that this is a swap-related problem I tried to turn swap of, but it doesn't affect anything.
> > It is probably some accounting bug.
> > 
> > If I start with init=/bin/bash, then this disappears.
> > I tried then to start usual /etc/init.d scripts then, and first one to show this bug was gpm.
> > but then I rebooted the system to X without gpm, and I still see 100% iowait.
> > 
> > No additional messages in dmesg.
> 
> does sysrq-t show any D state tasks?
> 
> 
This one:
Probably per-block device dirty writeback?
I am compiling now revision 1f7d6668c29b1dfa307a44844f9bb38356fc989b
Thanks for the pointer.



[  673.365631] pdflush       D c21bdecc     0   221      2
[  673.365635]        c21bdee0 00000046 00000002 c21bdecc c21bdec4 00000000 c21b3000 00000002
[  673.365643]        c0134892 c21b3164 c1e00200 00000001 c7109280 c21bdec0 c03ff849 c21bdef0
[  673.365650]        00052974 00000000 000000ff 00000000 00000000 00000000 c21bdef0 000529dc
[  673.365657] Call Trace:
[  673.365659]  [<c03fd728>] schedule_timeout+0x48/0xc0
[  673.365663]  [<c03fd50e>] io_schedule_timeout+0x5e/0xb0
[  673.365667]  [<c0170d11>] congestion_wait+0x71/0x90
[  673.365671]  [<c016b92e>] wb_kupdate+0x9e/0xf0
[  673.365675]  [<c016beb2>] pdflush+0x102/0x1d0
[  673.365679]  [<c013fa82>] kthread+0x42/0x70
[  673.365683]  [<c01050df>] kernel_thread_helper+0x7/0x18


Best regards,
	Maxim Levitsky

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
  2007-10-22  9:59   ` Maxim Levitsky
@ 2007-10-22 10:22     ` Peter Zijlstra
  2007-10-22 10:40       ` Maxim Levitsky
  0 siblings, 1 reply; 61+ messages in thread
From: Peter Zijlstra @ 2007-10-22 10:22 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: linux-kernel, Fengguang Wu, Andrew Morton

On Mon, 2007-10-22 at 11:59 +0200, Maxim Levitsky wrote:
> On Monday 22 October 2007 11:41:57 Peter Zijlstra wrote:
> > On Mon, 2007-10-22 at 08:22 +0200, Maxim Levitsky wrote:
> > > Hi,
> > > 
> > > I found a bug in current -git:
> > > 
> > > On my system on of cpus stays 100% in iowait mode (I have core 2 duo)
> > > Otherwise the system works OK, no disk activity and/or slowdown.
> > > Suspecting that this is a swap-related problem I tried to turn swap of, but it doesn't affect anything.
> > > It is probably some accounting bug.
> > > 
> > > If I start with init=/bin/bash, then this disappears.
> > > I tried then to start usual /etc/init.d scripts then, and first one to show this bug was gpm.
> > > but then I rebooted the system to X without gpm, and I still see 100% iowait.
> > > 
> > > No additional messages in dmesg.
> > 
> > does sysrq-t show any D state tasks?
> > 
> > 
> This one:
> Probably per-block device dirty writeback?
> I am compiling now revision 1f7d6668c29b1dfa307a44844f9bb38356fc989b
> Thanks for the pointer.
> 
> 
> 
> [  673.365631] pdflush       D c21bdecc     0   221      2
> [  673.365635]        c21bdee0 00000046 00000002 c21bdecc c21bdec4 00000000 c21b3000 00000002
> [  673.365643]        c0134892 c21b3164 c1e00200 00000001 c7109280 c21bdec0 c03ff849 c21bdef0
> [  673.365650]        00052974 00000000 000000ff 00000000 00000000 00000000 c21bdef0 000529dc
> [  673.365657] Call Trace:
> [  673.365659]  [<c03fd728>] schedule_timeout+0x48/0xc0
> [  673.365663]  [<c03fd50e>] io_schedule_timeout+0x5e/0xb0
> [  673.365667]  [<c0170d11>] congestion_wait+0x71/0x90
> [  673.365671]  [<c016b92e>] wb_kupdate+0x9e/0xf0
> [  673.365675]  [<c016beb2>] pdflush+0x102/0x1d0
> [  673.365679]  [<c013fa82>] kthread+0x42/0x70
> [  673.365683]  [<c01050df>] kernel_thread_helper+0x7/0x18
> 

That looks more like the inode writeback patches from Wu than the per
bdi dirty stuff. The later typically hangs in balance_dirty_pages().



^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
  2007-10-22 10:22     ` Peter Zijlstra
@ 2007-10-22 10:40       ` Maxim Levitsky
       [not found]         ` <393050530.03287@ustc.edu.cn>
  0 siblings, 1 reply; 61+ messages in thread
From: Maxim Levitsky @ 2007-10-22 10:40 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel, Fengguang Wu, Andrew Morton

On Monday 22 October 2007 12:22:10 Peter Zijlstra wrote:
> On Mon, 2007-10-22 at 11:59 +0200, Maxim Levitsky wrote:
> > On Monday 22 October 2007 11:41:57 Peter Zijlstra wrote:
> > > On Mon, 2007-10-22 at 08:22 +0200, Maxim Levitsky wrote:
> > > > Hi,
> > > > 
> > > > I found a bug in current -git:
> > > > 
> > > > On my system on of cpus stays 100% in iowait mode (I have core 2 duo)
> > > > Otherwise the system works OK, no disk activity and/or slowdown.
> > > > Suspecting that this is a swap-related problem I tried to turn swap of, but it doesn't affect anything.
> > > > It is probably some accounting bug.
> > > > 
> > > > If I start with init=/bin/bash, then this disappears.
> > > > I tried then to start usual /etc/init.d scripts then, and first one to show this bug was gpm.
> > > > but then I rebooted the system to X without gpm, and I still see 100% iowait.
> > > > 
> > > > No additional messages in dmesg.
> > > 
> > > does sysrq-t show any D state tasks?
> > > 
> > > 
> > This one:
> > Probably per-block device dirty writeback?
> > I am compiling now revision 1f7d6668c29b1dfa307a44844f9bb38356fc989b
> > Thanks for the pointer.
> > 
> > 
> > 
> > [  673.365631] pdflush       D c21bdecc     0   221      2
> > [  673.365635]        c21bdee0 00000046 00000002 c21bdecc c21bdec4 00000000 c21b3000 00000002
> > [  673.365643]        c0134892 c21b3164 c1e00200 00000001 c7109280 c21bdec0 c03ff849 c21bdef0
> > [  673.365650]        00052974 00000000 000000ff 00000000 00000000 00000000 c21bdef0 000529dc
> > [  673.365657] Call Trace:
> > [  673.365659]  [<c03fd728>] schedule_timeout+0x48/0xc0
> > [  673.365663]  [<c03fd50e>] io_schedule_timeout+0x5e/0xb0
> > [  673.365667]  [<c0170d11>] congestion_wait+0x71/0x90
> > [  673.365671]  [<c016b92e>] wb_kupdate+0x9e/0xf0
> > [  673.365675]  [<c016beb2>] pdflush+0x102/0x1d0
> > [  673.365679]  [<c013fa82>] kthread+0x42/0x70
> > [  673.365683]  [<c01050df>] kernel_thread_helper+0x7/0x18
> > 
> 
> That looks more like the inode writeback patches from Wu than the per
> bdi dirty stuff. The later typically hangs in balance_dirty_pages().
> 
> 
> 

Yes, you are right,

both revisions 1f7d6668c29b1dfa307a44844f9bb38356fc989b and 3e26c149c358529b1605f8959341d34bc4b880a3 work fine
But I didn't pay attention that those are before f4a1c2bce002f683801bcdbbc9fd89804614fb6b.
So, back to the drawing board.... :-)

Will test revision 2e6883bdf49abd0e7f0d9b6297fc3be7ebb2250b, just after writeback patches.
Thanks,
	Best regards,
		Maxim Levitsky
		

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
       [not found]         ` <393050530.03287@ustc.edu.cn>
@ 2007-10-22 10:55           ` Fengguang Wu
  2007-10-22 10:58           ` Maxim Levitsky
  1 sibling, 0 replies; 61+ messages in thread
From: Fengguang Wu @ 2007-10-22 10:55 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: Peter Zijlstra, linux-kernel, Fengguang Wu, Andrew Morton

On Mon, Oct 22, 2007 at 12:40:24PM +0200, Maxim Levitsky wrote:
> On Monday 22 October 2007 12:22:10 Peter Zijlstra wrote:
> > > [  673.365631] pdflush       D c21bdecc     0   221      2
> > > [  673.365635]        c21bdee0 00000046 00000002 c21bdecc c21bdec4 00000000 c21b3000 00000002
> > > [  673.365643]        c0134892 c21b3164 c1e00200 00000001 c7109280 c21bdec0 c03ff849 c21bdef0
> > > [  673.365650]        00052974 00000000 000000ff 00000000 00000000 00000000 c21bdef0 000529dc
> > > [  673.365657] Call Trace:
> > > [  673.365659]  [<c03fd728>] schedule_timeout+0x48/0xc0
> > > [  673.365663]  [<c03fd50e>] io_schedule_timeout+0x5e/0xb0
> > > [  673.365667]  [<c0170d11>] congestion_wait+0x71/0x90
> > > [  673.365671]  [<c016b92e>] wb_kupdate+0x9e/0xf0
> > > [  673.365675]  [<c016beb2>] pdflush+0x102/0x1d0
> > > [  673.365679]  [<c013fa82>] kthread+0x42/0x70
> > > [  673.365683]  [<c01050df>] kernel_thread_helper+0x7/0x18
> > > 
> > 
> > That looks more like the inode writeback patches from Wu than the per
> > bdi dirty stuff. The later typically hangs in balance_dirty_pages().
> > 
> > 
> > 
> 
> Yes, you are right,
> 
> both revisions 1f7d6668c29b1dfa307a44844f9bb38356fc989b and 3e26c149c358529b1605f8959341d34bc4b880a3 work fine
> But I didn't pay attention that those are before f4a1c2bce002f683801bcdbbc9fd89804614fb6b.
> So, back to the drawing board.... :-)
> 
> Will test revision 2e6883bdf49abd0e7f0d9b6297fc3be7ebb2250b, just after writeback patches.

Thank you. I'll try if I can reproduce it locally...

Fengguang


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
       [not found]         ` <393050530.03287@ustc.edu.cn>
  2007-10-22 10:55           ` Fengguang Wu
@ 2007-10-22 10:58           ` Maxim Levitsky
       [not found]             ` <393051953.24752@ustc.edu.cn>
  1 sibling, 1 reply; 61+ messages in thread
From: Maxim Levitsky @ 2007-10-22 10:58 UTC (permalink / raw)
  To: Fengguang Wu; +Cc: Peter Zijlstra, linux-kernel, Fengguang Wu, Andrew Morton

On Monday 22 October 2007 12:55:25 Fengguang Wu wrote:
> On Mon, Oct 22, 2007 at 12:40:24PM +0200, Maxim Levitsky wrote:
> > On Monday 22 October 2007 12:22:10 Peter Zijlstra wrote:
> > > > [  673.365631] pdflush       D c21bdecc     0   221      2
> > > > [  673.365635]        c21bdee0 00000046 00000002 c21bdecc c21bdec4 00000000 c21b3000 00000002
> > > > [  673.365643]        c0134892 c21b3164 c1e00200 00000001 c7109280 c21bdec0 c03ff849 c21bdef0
> > > > [  673.365650]        00052974 00000000 000000ff 00000000 00000000 00000000 c21bdef0 000529dc
> > > > [  673.365657] Call Trace:
> > > > [  673.365659]  [<c03fd728>] schedule_timeout+0x48/0xc0
> > > > [  673.365663]  [<c03fd50e>] io_schedule_timeout+0x5e/0xb0
> > > > [  673.365667]  [<c0170d11>] congestion_wait+0x71/0x90
> > > > [  673.365671]  [<c016b92e>] wb_kupdate+0x9e/0xf0
> > > > [  673.365675]  [<c016beb2>] pdflush+0x102/0x1d0
> > > > [  673.365679]  [<c013fa82>] kthread+0x42/0x70
> > > > [  673.365683]  [<c01050df>] kernel_thread_helper+0x7/0x18
> > > > 
> > > 
> > > That looks more like the inode writeback patches from Wu than the per
> > > bdi dirty stuff. The later typically hangs in balance_dirty_pages().
> > > 
> > > 
> > > 
> > 
> > Yes, you are right,
> > 
> > both revisions 1f7d6668c29b1dfa307a44844f9bb38356fc989b and 3e26c149c358529b1605f8959341d34bc4b880a3 work fine
> > But I didn't pay attention that those are before f4a1c2bce002f683801bcdbbc9fd89804614fb6b.
> > So, back to the drawing board.... :-)
> > 
> > Will test revision 2e6883bdf49abd0e7f0d9b6297fc3be7ebb2250b, just after writeback patches.
> 
> Thank you. I'll try if I can reproduce it locally...
> 
> Fengguang
> 
> 

Bingo,

Revision 2e6883bdf49abd0e7f0d9b6297fc3be7ebb2250b shows this bug.

I will now bisect to find exact patch that caused this bug,
Thanks,
	Maxim Levitsky

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
       [not found]             ` <393051953.24752@ustc.edu.cn>
@ 2007-10-22 11:19               ` Fengguang Wu
  2007-10-22 12:21               ` Maxim Levitsky
  1 sibling, 0 replies; 61+ messages in thread
From: Fengguang Wu @ 2007-10-22 11:19 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: Peter Zijlstra, linux-kernel, Fengguang Wu, Andrew Morton

On Mon, Oct 22, 2007 at 12:58:11PM +0200, Maxim Levitsky wrote:
> On Monday 22 October 2007 12:55:25 Fengguang Wu wrote:
> > On Mon, Oct 22, 2007 at 12:40:24PM +0200, Maxim Levitsky wrote:
> > > On Monday 22 October 2007 12:22:10 Peter Zijlstra wrote:
> > > > > [  673.365631] pdflush       D c21bdecc     0   221      2
> > > > > [  673.365635]        c21bdee0 00000046 00000002 c21bdecc c21bdec4 00000000 c21b3000 00000002
> > > > > [  673.365643]        c0134892 c21b3164 c1e00200 00000001 c7109280 c21bdec0 c03ff849 c21bdef0
> > > > > [  673.365650]        00052974 00000000 000000ff 00000000 00000000 00000000 c21bdef0 000529dc
> > > > > [  673.365657] Call Trace:
> > > > > [  673.365659]  [<c03fd728>] schedule_timeout+0x48/0xc0
> > > > > [  673.365663]  [<c03fd50e>] io_schedule_timeout+0x5e/0xb0
> > > > > [  673.365667]  [<c0170d11>] congestion_wait+0x71/0x90
> > > > > [  673.365671]  [<c016b92e>] wb_kupdate+0x9e/0xf0
> > > > > [  673.365675]  [<c016beb2>] pdflush+0x102/0x1d0
> > > > > [  673.365679]  [<c013fa82>] kthread+0x42/0x70
> > > > > [  673.365683]  [<c01050df>] kernel_thread_helper+0x7/0x18
> > > > > 
> > > > 
> > > > That looks more like the inode writeback patches from Wu than the per
> > > > bdi dirty stuff. The later typically hangs in balance_dirty_pages().
> > > > 
> > > > 
> > > > 
> > > 
> > > Yes, you are right,
> > > 
> > > both revisions 1f7d6668c29b1dfa307a44844f9bb38356fc989b and 3e26c149c358529b1605f8959341d34bc4b880a3 work fine
> > > But I didn't pay attention that those are before f4a1c2bce002f683801bcdbbc9fd89804614fb6b.
> > > So, back to the drawing board.... :-)
> > > 
> > > Will test revision 2e6883bdf49abd0e7f0d9b6297fc3be7ebb2250b, just after writeback patches.
> > 
> > Thank you. I'll try if I can reproduce it locally...
> > 
> > Fengguang
> > 
> > 
> 
> Bingo,
> 
> Revision 2e6883bdf49abd0e7f0d9b6297fc3be7ebb2250b shows this bug.
> 
> I will now bisect to find exact patch that caused this bug,

This one is most relevant:

1f7decf6d9f06dac008b8d66935c0c3b18e564f9
writeback: introduce writeback_control.more_io to indicate more io

Still compiling the kernel...

Thank you,
Fengguang


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
       [not found]             ` <393051953.24752@ustc.edu.cn>
  2007-10-22 11:19               ` Fengguang Wu
@ 2007-10-22 12:21               ` Maxim Levitsky
       [not found]                 ` <393056632.00561@ustc.edu.cn>
       [not found]                 ` <393126119.26275@ustc.edu.cn>
  1 sibling, 2 replies; 61+ messages in thread
From: Maxim Levitsky @ 2007-10-22 12:21 UTC (permalink / raw)
  To: Fengguang Wu; +Cc: Peter Zijlstra, linux-kernel, Fengguang Wu, Andrew Morton

On Monday 22 October 2007 13:19:08 Fengguang Wu wrote:
> On Mon, Oct 22, 2007 at 12:58:11PM +0200, Maxim Levitsky wrote:
> > On Monday 22 October 2007 12:55:25 Fengguang Wu wrote:
> > > On Mon, Oct 22, 2007 at 12:40:24PM +0200, Maxim Levitsky wrote:
> > > > On Monday 22 October 2007 12:22:10 Peter Zijlstra wrote:
> > > > > > [  673.365631] pdflush       D c21bdecc     0   221      2
> > > > > > [  673.365635]        c21bdee0 00000046 00000002 c21bdecc c21bdec4 00000000 c21b3000 00000002
> > > > > > [  673.365643]        c0134892 c21b3164 c1e00200 00000001 c7109280 c21bdec0 c03ff849 c21bdef0
> > > > > > [  673.365650]        00052974 00000000 000000ff 00000000 00000000 00000000 c21bdef0 000529dc
> > > > > > [  673.365657] Call Trace:
> > > > > > [  673.365659]  [<c03fd728>] schedule_timeout+0x48/0xc0
> > > > > > [  673.365663]  [<c03fd50e>] io_schedule_timeout+0x5e/0xb0
> > > > > > [  673.365667]  [<c0170d11>] congestion_wait+0x71/0x90
> > > > > > [  673.365671]  [<c016b92e>] wb_kupdate+0x9e/0xf0
> > > > > > [  673.365675]  [<c016beb2>] pdflush+0x102/0x1d0
> > > > > > [  673.365679]  [<c013fa82>] kthread+0x42/0x70
> > > > > > [  673.365683]  [<c01050df>] kernel_thread_helper+0x7/0x18
> > > > > > 
> > > > > 
> > > > > That looks more like the inode writeback patches from Wu than the per
> > > > > bdi dirty stuff. The later typically hangs in balance_dirty_pages().
> > > > > 
> > > > > 
> > > > > 
> > > > 
> > > > Yes, you are right,
> > > > 
> > > > both revisions 1f7d6668c29b1dfa307a44844f9bb38356fc989b and 3e26c149c358529b1605f8959341d34bc4b880a3 work fine
> > > > But I didn't pay attention that those are before f4a1c2bce002f683801bcdbbc9fd89804614fb6b.
> > > > So, back to the drawing board.... :-)
> > > > 
> > > > Will test revision 2e6883bdf49abd0e7f0d9b6297fc3be7ebb2250b, just after writeback patches.
> > > 
> > > Thank you. I'll try if I can reproduce it locally...
> > > 
> > > Fengguang
> > > 
> > > 
> > 
> > Bingo,
> > 
> > Revision 2e6883bdf49abd0e7f0d9b6297fc3be7ebb2250b shows this bug.
> > 
> > I will now bisect to find exact patch that caused this bug,
> 
> This one is most relevant:
> 
> 1f7decf6d9f06dac008b8d66935c0c3b18e564f9
> writeback: introduce writeback_control.more_io to indicate more io
Exactly.
> 
> Still compiling the kernel...
> 
> Thank you,
> Fengguang
> 
> 
Hi,


I Bisected this bug to exactly this commit:

2e6883bdf49abd0e7f0d9b6297fc3be7ebb2250b
writeback: introduce writeback_control.more_io to indicate more io

Reverting it and compiling latest git shows no more signs of that bug.
Thanks,
	Best regards,
		Maxim Levitsky

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
       [not found]                 ` <393056632.00561@ustc.edu.cn>
@ 2007-10-22 12:37                   ` Fengguang Wu
  2007-10-22 13:05                   ` Maxim Levitsky
  1 sibling, 0 replies; 61+ messages in thread
From: Fengguang Wu @ 2007-10-22 12:37 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: Peter Zijlstra, linux-kernel, Andrew Morton

On Mon, Oct 22, 2007 at 02:21:21PM +0200, Maxim Levitsky wrote:
> I Bisected this bug to exactly this commit:
> 
> 2e6883bdf49abd0e7f0d9b6297fc3be7ebb2250b
> writeback: introduce writeback_control.more_io to indicate more io
> 
> Reverting it and compiling latest git shows no more signs of that bug.

Thank you very much.

I guess your system has some difficulty in writing back some inodes.
(i.e. a bug disclosed by this patch, the 100% iowait only makes it
more obvious)

I cannot reproduce it with your .config, so would you recompile and
run the kernel with the above commit _and_ the below debugging patch?

Thank you,
Fengguang
---

 fs/fs-writeback.c |   15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

--- linux-2.6.23-git17.orig/fs/fs-writeback.c
+++ linux-2.6.23-git17/fs/fs-writeback.c
@@ -164,12 +164,25 @@ static void redirty_tail(struct inode *i
 	list_move(&inode->i_list, &sb->s_dirty);
 }
 
+#define requeue_io(inode)						\
+	do {								\
+		__requeue_io(inode, __LINE__);				\
+	} while (0)
+
 /*
  * requeue inode for re-scanning after sb->s_io list is exhausted.
  */
-static void requeue_io(struct inode *inode)
+static void __requeue_io(struct inode *inode, int line)
 {
 	list_move(&inode->i_list, &inode->i_sb->s_more_io);
+
+	printk(KERN_DEBUG "redirtied inode %lu size %llu at %02x:%02x(%s), line %d.\n",
+			inode->i_ino,
+			i_size_read(inode),
+			MAJOR(inode->i_sb->s_dev),
+			MINOR(inode->i_sb->s_dev),
+			inode->i_sb->s_id,
+			line);
 }
 
 static void inode_sync_complete(struct inode *inode)


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
       [not found]                 ` <393056632.00561@ustc.edu.cn>
  2007-10-22 12:37                   ` Fengguang Wu
@ 2007-10-22 13:05                   ` Maxim Levitsky
       [not found]                     ` <393058650.02921@ustc.edu.cn>
  1 sibling, 1 reply; 61+ messages in thread
From: Maxim Levitsky @ 2007-10-22 13:05 UTC (permalink / raw)
  To: Fengguang Wu; +Cc: Peter Zijlstra, linux-kernel, Andrew Morton

On Monday 22 October 2007 14:37:07 Fengguang Wu wrote:
> On Mon, Oct 22, 2007 at 02:21:21PM +0200, Maxim Levitsky wrote:
> > I Bisected this bug to exactly this commit:
> > 
> > 2e6883bdf49abd0e7f0d9b6297fc3be7ebb2250b
> > writeback: introduce writeback_control.more_io to indicate more io
> > 
> > Reverting it and compiling latest git shows no more signs of that bug.
> 
> Thank you very much.
> 
> I guess your system has some difficulty in writing back some inodes.
> (i.e. a bug disclosed by this patch, the 100% iowait only makes it
> more obvious)
> 
> I cannot reproduce it with your .config, so would you recompile and
> run the kernel with the above commit _and_ the below debugging patch?
> 
> Thank you,
> Fengguang
> ---
> 
>  fs/fs-writeback.c |   15 ++++++++++++++-
>  1 file changed, 14 insertions(+), 1 deletion(-)
> 
> --- linux-2.6.23-git17.orig/fs/fs-writeback.c
> +++ linux-2.6.23-git17/fs/fs-writeback.c
> @@ -164,12 +164,25 @@ static void redirty_tail(struct inode *i
>  	list_move(&inode->i_list, &sb->s_dirty);
>  }
>  
> +#define requeue_io(inode)						\
> +	do {								\
> +		__requeue_io(inode, __LINE__);				\
> +	} while (0)
> +
>  /*
>   * requeue inode for re-scanning after sb->s_io list is exhausted.
>   */
> -static void requeue_io(struct inode *inode)
> +static void __requeue_io(struct inode *inode, int line)
>  {
>  	list_move(&inode->i_list, &inode->i_sb->s_more_io);
> +
> +	printk(KERN_DEBUG "redirtied inode %lu size %llu at %02x:%02x(%s), line %d.\n",
> +			inode->i_ino,
> +			i_size_read(inode),
> +			MAJOR(inode->i_sb->s_dev),
> +			MINOR(inode->i_sb->s_dev),
> +			inode->i_sb->s_id,
> +			line);
>  }
>  
>  static void inode_sync_complete(struct inode *inode)
> 
> 

Hi,
Thank you very much too, for helping me.


Applied.
Had to kill klogd, since kernel generates tons of redirtied inode messages.
Size of the kern.log is 863 KB, thus I I don't think it is polite to attachit.
Don't know whenever it is ok to put it on pastebin too.

Anyway, it shows lots of redirtied inode... messages,
and while most of them are "at 08:02(sda2)" , my reiserfs root partition, some are

"Oct 22 14:50:27 MAIN kernel: [   73.643794] redirtied inode 2582 size 0 at 00:0f(tmpfs), line 300."

" line 300" is shown always

(I have /var/run, /var/lock, /dev mounted as tmpfs, default kubuntu setup)


Best regards,
	Maxim Levitsky

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
       [not found]                     ` <393058650.02921@ustc.edu.cn>
@ 2007-10-22 13:10                       ` Fengguang Wu
       [not found]                         ` <393060478.03650@ustc.edu.cn>
  2007-10-22 13:28                       ` 100% iowait on one of cpus " Maxim Levitsky
  1 sibling, 1 reply; 61+ messages in thread
From: Fengguang Wu @ 2007-10-22 13:10 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: Peter Zijlstra, linux-kernel, Andrew Morton

On Mon, Oct 22, 2007 at 03:05:35PM +0200, Maxim Levitsky wrote:
> Hi,
> Thank you very much too, for helping me.

You are welcome :-)

> Applied.
> Had to kill klogd, since kernel generates tons of redirtied inode messages.
> Size of the kern.log is 863 KB, thus I I don't think it is polite to attachit.
> Don't know whenever it is ok to put it on pastebin too.
> 
> Anyway, it shows lots of redirtied inode... messages,
> and while most of them are "at 08:02(sda2)" , my reiserfs root partition, some are
> 
> "Oct 22 14:50:27 MAIN kernel: [   73.643794] redirtied inode 2582 size 0 at 00:0f(tmpfs), line 300."
> 
> " line 300" is shown always
> 
> (I have /var/run, /var/lock, /dev mounted as tmpfs, default kubuntu setup)

Thank you for the testing out.

Hmm, Maybe it's an reiserfs related issue.  Do you have the full log file?

Thank you,
Fengguang


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
       [not found]                     ` <393058650.02921@ustc.edu.cn>
  2007-10-22 13:10                       ` Fengguang Wu
@ 2007-10-22 13:28                       ` Maxim Levitsky
  1 sibling, 0 replies; 61+ messages in thread
From: Maxim Levitsky @ 2007-10-22 13:28 UTC (permalink / raw)
  To: Fengguang Wu; +Cc: Peter Zijlstra, linux-kernel, Andrew Morton

On Monday 22 October 2007 15:10:45 Fengguang Wu wrote:
> On Mon, Oct 22, 2007 at 03:05:35PM +0200, Maxim Levitsky wrote:
> > Hi,
> > Thank you very much too, for helping me.
> 
> You are welcome :-)
> 
> > Applied.
> > Had to kill klogd, since kernel generates tons of redirtied inode messages.
> > Size of the kern.log is 863 KB, thus I I don't think it is polite to attachit.
> > Don't know whenever it is ok to put it on pastebin too.
> > 
> > Anyway, it shows lots of redirtied inode... messages,
> > and while most of them are "at 08:02(sda2)" , my reiserfs root partition, some are
> > 
> > "Oct 22 14:50:27 MAIN kernel: [   73.643794] redirtied inode 2582 size 0 at 00:0f(tmpfs), line 300."
> > 
> > " line 300" is shown always
> > 
> > (I have /var/run, /var/lock, /dev mounted as tmpfs, default kubuntu setup)
> 
> Thank you for the testing out.
> 
> Hmm, Maybe it's an reiserfs related issue.  Do you have the full log file?
No, I don't think so, like I said it sometimes shows the same message on tmpfs
> 
> Thank you,
> Fengguang
> 
> 
Best Regards,
	Maxim Levitsky

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
       [not found]                         ` <393060478.03650@ustc.edu.cn>
@ 2007-10-22 13:41                           ` Fengguang Wu
  2007-10-31 15:22                           ` Torsten Kaiser
  1 sibling, 0 replies; 61+ messages in thread
From: Fengguang Wu @ 2007-10-22 13:41 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: Peter Zijlstra, linux-kernel, Andrew Morton

On Mon, Oct 22, 2007 at 09:10:45PM +0800, Fengguang Wu wrote:
> Hmm, Maybe it's an reiserfs related issue.  Do you have the full log file?

Bingo! It can be reproduced in -mm on reiserfs:

# mkfs.reiserfs /dev/sdb1
# mount /dev/sdb1 /test
# cp bin /test
<wait for a while>
# dmesg
[...]
[  418.346113] requeue_io 308: inode 6 size 302 at 08:11(sdb1)
[  418.346119] requeue_io 308: inode 7 size 196 at 08:11(sdb1)
[  418.346125] requeue_io 308: inode 8 size 85 at 08:11(sdb1)
[  418.346131] requeue_io 308: inode 9 size 180 at 08:11(sdb1)
[  418.346136] requeue_io 308: inode 10 size 1488 at 08:11(sdb1)
[  418.346142] requeue_io 308: inode 12 size 1358 at 08:11(sdb1)
[  418.346148] requeue_io 308: inode 13 size 482 at 08:11(sdb1)
[  418.346153] requeue_io 308: inode 14 size 171 at 08:11(sdb1)
[  418.346159] requeue_io 308: inode 15 size 93 at 08:11(sdb1)
[  418.346164] requeue_io 308: inode 16 size 81 at 08:11(sdb1)
[  418.346170] requeue_io 308: inode 17 size 212 at 08:11(sdb1)
[  418.346176] requeue_io 308: inode 18 size 431 at 08:11(sdb1)
[  418.346181] requeue_io 308: inode 19 size 231 at 08:11(sdb1)
[  418.346187] requeue_io 308: inode 20 size 1756 at 08:11(sdb1)
[  418.346193] requeue_io 308: inode 21 size 1229 at 08:11(sdb1)
[  418.346198] requeue_io 308: inode 22 size 157 at 08:11(sdb1)
[  418.346204] requeue_io 308: inode 23 size 3430 at 08:11(sdb1)
[  418.346210] requeue_io 308: inode 24 size 200 at 08:11(sdb1)
[  418.346215] requeue_io 308: inode 25 size 202 at 08:11(sdb1)
[  418.346221] requeue_io 308: inode 26 size 386 at 08:11(sdb1)
[  418.346226] requeue_io 308: inode 27 size 264 at 08:11(sdb1)
[  418.346232] requeue_io 308: inode 28 size 268 at 08:11(sdb1)
[  418.346238] requeue_io 308: inode 29 size 1228 at 08:11(sdb1)
[  418.346243] requeue_io 308: inode 30 size 404 at 08:11(sdb1)
[  418.346249] requeue_io 308: inode 31 size 2452 at 08:11(sdb1)
[  418.346255] requeue_io 308: inode 32 size 1236 at 08:11(sdb1)
[  418.346260] requeue_io 308: inode 33 size 655 at 08:11(sdb1)
[  418.346266] requeue_io 308: inode 35 size 330 at 08:11(sdb1)
[  418.346272] requeue_io 308: inode 36 size 248 at 08:11(sdb1)
[  418.346277] requeue_io 308: inode 37 size 683 at 08:11(sdb1)
[  418.346283] requeue_io 308: inode 38 size 1451 at 08:11(sdb1)
[  418.346288] requeue_io 308: inode 39 size 894 at 08:11(sdb1)
[  418.346294] requeue_io 308: inode 40 size 879 at 08:11(sdb1)
[  418.346300] requeue_io 308: inode 42 size 797 at 08:11(sdb1)
[  418.346305] requeue_io 308: inode 43 size 1314 at 08:11(sdb1)
[  418.346311] requeue_io 308: inode 44 size 1463 at 08:11(sdb1)
[  418.346317] requeue_io 308: inode 45 size 3032 at 08:11(sdb1)
[  418.346322] requeue_io 308: inode 46 size 325 at 08:11(sdb1)
[  418.346328] requeue_io 308: inode 47 size 583 at 08:11(sdb1)
[  418.346334] requeue_io 308: inode 48 size 1660 at 08:11(sdb1)
[  418.346339] requeue_io 308: inode 49 size 3159 at 08:11(sdb1)
[  418.346345] requeue_io 308: inode 50 size 510 at 08:11(sdb1)
[  418.346350] requeue_io 308: inode 51 size 100 at 08:11(sdb1)
[  418.346356] requeue_io 308: inode 52 size 143 at 08:11(sdb1)
[  418.346370] requeue_io 308: inode 53 size 954 at 08:11(sdb1)
[  418.346373] requeue_io 308: inode 54 size 322 at 08:11(sdb1)
[  418.346376] requeue_io 308: inode 55 size 970 at 08:11(sdb1)
[  418.346379] requeue_io 308: inode 57 size 483 at 08:11(sdb1)
[  418.346382] requeue_io 308: inode 58 size 1125 at 08:11(sdb1)
[  418.346385] requeue_io 308: inode 59 size 2196 at 08:11(sdb1)
[  418.346388] requeue_io 308: inode 60 size 104 at 08:11(sdb1)
[  418.346391] requeue_io 308: inode 61 size 488 at 08:11(sdb1)
[  418.346394] requeue_io 308: inode 62 size 116 at 08:11(sdb1)
[  418.346397] requeue_io 308: inode 63 size 907 at 08:11(sdb1)
[  418.346400] requeue_io 308: inode 64 size 1076 at 08:11(sdb1)
[  418.346403] requeue_io 308: inode 65 size 460 at 08:11(sdb1)
[  418.346406] requeue_io 308: inode 66 size 1092 at 08:11(sdb1)
[  418.346409] requeue_io 308: inode 67 size 424 at 08:11(sdb1)
[  418.346412] requeue_io 308: inode 68 size 696 at 08:11(sdb1)
[  418.346415] requeue_io 308: inode 70 size 137 at 08:11(sdb1)
[  418.346418] requeue_io 308: inode 71 size 201 at 08:11(sdb1)
[  418.346421] requeue_io 308: inode 72 size 150 at 08:11(sdb1)
[  418.346424] requeue_io 308: inode 73 size 188 at 08:11(sdb1)
[  418.346427] requeue_io 308: inode 75 size 1208 at 08:11(sdb1)
[  418.346431] requeue_io 308: inode 76 size 493 at 08:11(sdb1)
[  418.346434] requeue_io 308: inode 77 size 484 at 08:11(sdb1)
[  418.346437] requeue_io 308: inode 78 size 356 at 08:11(sdb1)
[  418.346440] requeue_io 308: inode 79 size 895 at 08:11(sdb1)
[  418.346443] requeue_io 308: inode 80 size 847 at 08:11(sdb1)
[  418.346446] requeue_io 308: inode 81 size 3281 at 08:11(sdb1)
[  418.346449] requeue_io 308: inode 82 size 3329 at 08:11(sdb1)
[  418.346452] requeue_io 308: inode 83 size 115 at 08:11(sdb1)
[  418.346455] requeue_io 308: inode 84 size 644 at 08:11(sdb1)
[  418.346458] requeue_io 308: inode 85 size 125 at 08:11(sdb1)
[  418.346461] requeue_io 308: inode 86 size 199 at 08:11(sdb1)
[  418.346464] requeue_io 308: inode 87 size 204 at 08:11(sdb1)
[  418.346467] requeue_io 308: inode 88 size 72 at 08:11(sdb1)
[  418.346476] mm/page-writeback.c 658 wb_kupdate: pdflush(209) 17174 global 2012 0 0 wc _M tw 1024 sk 0
[  418.366318] requeue_io 308: inode 6 size 302 at 08:11(sdb1)
[  418.366325] requeue_io 308: inode 7 size 196 at 08:11(sdb1)
[  418.366330] requeue_io 308: inode 8 size 85 at 08:11(sdb1)
[  418.366334] requeue_io 308: inode 9 size 180 at 08:11(sdb1)
[  418.366338] requeue_io 308: inode 10 size 1488 at 08:11(sdb1)
[  418.366342] requeue_io 308: inode 12 size 1358 at 08:11(sdb1)
[  418.366346] requeue_io 308: inode 13 size 482 at 08:11(sdb1)
[  418.366350] requeue_io 308: inode 14 size 171 at 08:11(sdb1)
[  418.366354] requeue_io 308: inode 15 size 93 at 08:11(sdb1)
[  418.366358] requeue_io 308: inode 16 size 81 at 08:11(sdb1)
[  418.366361] requeue_io 308: inode 17 size 212 at 08:11(sdb1)
[  418.366365] requeue_io 308: inode 18 size 431 at 08:11(sdb1)
[  418.366369] requeue_io 308: inode 19 size 231 at 08:11(sdb1)
[  418.366373] requeue_io 308: inode 20 size 1756 at 08:11(sdb1)
[  418.366378] requeue_io 308: inode 21 size 1229 at 08:11(sdb1)
[  418.366382] requeue_io 308: inode 22 size 157 at 08:11(sdb1)
[  418.366386] requeue_io 308: inode 23 size 3430 at 08:11(sdb1)
[  418.366390] requeue_io 308: inode 24 size 200 at 08:11(sdb1)
[  418.366394] requeue_io 308: inode 25 size 202 at 08:11(sdb1)
[  418.366398] requeue_io 308: inode 26 size 386 at 08:11(sdb1)
[  418.366402] requeue_io 308: inode 27 size 264 at 08:11(sdb1)
[  418.366407] requeue_io 308: inode 28 size 268 at 08:11(sdb1)
[  418.366411] requeue_io 308: inode 29 size 1228 at 08:11(sdb1)
[  418.366415] requeue_io 308: inode 30 size 404 at 08:11(sdb1)
[  418.366419] requeue_io 308: inode 31 size 2452 at 08:11(sdb1)
[  418.366423] requeue_io 308: inode 32 size 1236 at 08:11(sdb1)
[  418.366427] requeue_io 308: inode 33 size 655 at 08:11(sdb1)
[  418.366431] requeue_io 308: inode 35 size 330 at 08:11(sdb1)
[  418.366435] requeue_io 308: inode 36 size 248 at 08:11(sdb1)
[  418.366439] requeue_io 308: inode 37 size 683 at 08:11(sdb1)
[  418.366443] requeue_io 308: inode 38 size 1451 at 08:11(sdb1)
[  418.366446] requeue_io 308: inode 39 size 894 at 08:11(sdb1)
[  418.366450] requeue_io 308: inode 40 size 879 at 08:11(sdb1)
[  418.366453] requeue_io 308: inode 42 size 797 at 08:11(sdb1)
[  418.366457] requeue_io 308: inode 43 size 1314 at 08:11(sdb1)
[  418.366460] requeue_io 308: inode 44 size 1463 at 08:11(sdb1)
[  418.366464] requeue_io 308: inode 45 size 3032 at 08:11(sdb1)
[  418.366468] requeue_io 308: inode 46 size 325 at 08:11(sdb1)
[  418.366471] requeue_io 308: inode 47 size 583 at 08:11(sdb1)
[  418.366475] requeue_io 308: inode 48 size 1660 at 08:11(sdb1)
[  418.366478] requeue_io 308: inode 49 size 3159 at 08:11(sdb1)
[  418.366482] requeue_io 308: inode 50 size 510 at 08:11(sdb1)
[  418.366485] requeue_io 308: inode 51 size 100 at 08:11(sdb1)
[  418.366489] requeue_io 308: inode 52 size 143 at 08:11(sdb1)
[  418.366492] requeue_io 308: inode 53 size 954 at 08:11(sdb1)
[  418.366496] requeue_io 308: inode 54 size 322 at 08:11(sdb1)
[  418.366500] requeue_io 308: inode 55 size 970 at 08:11(sdb1)
[  418.366503] requeue_io 308: inode 57 size 483 at 08:11(sdb1)
[  418.366507] requeue_io 308: inode 58 size 1125 at 08:11(sdb1)
[  418.366511] requeue_io 308: inode 59 size 2196 at 08:11(sdb1)
[  418.366514] requeue_io 308: inode 60 size 104 at 08:11(sdb1)
[  418.366518] requeue_io 308: inode 61 size 488 at 08:11(sdb1)
[  418.366522] requeue_io 308: inode 62 size 116 at 08:11(sdb1)
[  418.366525] requeue_io 308: inode 63 size 907 at 08:11(sdb1)
[  418.366529] requeue_io 308: inode 64 size 1076 at 08:11(sdb1)
[  418.366532] requeue_io 308: inode 65 size 460 at 08:11(sdb1)
[  418.366536] requeue_io 308: inode 66 size 1092 at 08:11(sdb1)
[  418.366539] requeue_io 308: inode 67 size 424 at 08:11(sdb1)
[  418.366543] requeue_io 308: inode 68 size 696 at 08:11(sdb1)
[  418.366546] requeue_io 308: inode 70 size 137 at 08:11(sdb1)
[  418.366550] requeue_io 308: inode 71 size 201 at 08:11(sdb1)
[  418.366553] requeue_io 308: inode 72 size 150 at 08:11(sdb1)
[  418.366557] requeue_io 308: inode 73 size 188 at 08:11(sdb1)
[  418.366561] requeue_io 308: inode 75 size 1208 at 08:11(sdb1)
[  418.366564] requeue_io 308: inode 76 size 493 at 08:11(sdb1)
[  418.366567] requeue_io 308: inode 77 size 484 at 08:11(sdb1)
[  418.366571] requeue_io 308: inode 78 size 356 at 08:11(sdb1)
[  418.366575] requeue_io 308: inode 79 size 895 at 08:11(sdb1)
[  418.366578] requeue_io 308: inode 80 size 847 at 08:11(sdb1)
[  418.366582] requeue_io 308: inode 81 size 3281 at 08:11(sdb1)
[  418.366586] requeue_io 308: inode 82 size 3329 at 08:11(sdb1)
[  418.366590] requeue_io 308: inode 83 size 115 at 08:11(sdb1)
[  418.366593] requeue_io 308: inode 84 size 644 at 08:11(sdb1)
[  418.366597] requeue_io 308: inode 85 size 125 at 08:11(sdb1)
[  418.366600] requeue_io 308: inode 86 size 199 at 08:11(sdb1)
[  418.366604] requeue_io 308: inode 87 size 204 at 08:11(sdb1)
[  418.366607] requeue_io 308: inode 88 size 72 at 08:11(sdb1)
[  418.366622] mm/page-writeback.c 658 wb_kupdate: pdflush(209) 17174 global 2012 0 0 wc _M tw 1024 sk 0


^ permalink raw reply	[flat|nested] 61+ messages in thread

* [PATCH] reiserfs: don't drop PG_dirty when releasing sub-page-sized dirty file
       [not found]                 ` <393126119.26275@ustc.edu.cn>
@ 2007-10-23  7:55                   ` Fengguang Wu
  2007-10-23 10:07                   ` Peter Zijlstra
  2007-10-23 10:17                   ` Maxim Levitsky
  2 siblings, 0 replies; 61+ messages in thread
From: Fengguang Wu @ 2007-10-23  7:55 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: Peter Zijlstra, linux-kernel, Fengguang Wu, Andrew Morton

This is not a new problem in 2.6.23-git17.
2.6.22/2.6.23 is buggy in the same way.

Reiserfs could leave newly created sub-page-size files in dirty state
for ever.  They cannot be synced to disk by pdflush routines or
explicit `sync' commands.  Only `umount' can do the trick.

The direct cause is: the dirty page's PG_dirty is wrongly _cleared_.
Call trace:
	 [<ffffffff8027e920>] cancel_dirty_page+0xd0/0xf0
	 [<ffffffff8816d470>] :reiserfs:reiserfs_cut_from_item+0x660/0x710
	 [<ffffffff8816d791>] :reiserfs:reiserfs_do_truncate+0x271/0x530
	 [<ffffffff8815872d>] :reiserfs:reiserfs_truncate_file+0xfd/0x3b0
	 [<ffffffff8815d3d0>] :reiserfs:reiserfs_file_release+0x1e0/0x340
	 [<ffffffff802a187c>] __fput+0xcc/0x1b0
	 [<ffffffff802a1ba6>] fput+0x16/0x20
	 [<ffffffff8029e676>] filp_close+0x56/0x90
	 [<ffffffff8029fe0d>] sys_close+0xad/0x110
	 [<ffffffff8020c41e>] system_call+0x7e/0x83

Fix the bug by removing the cancel_dirty_page() call. Tests show that
it causes no bad behaviors on various write sizes.


=== for the patient ===
Here are more detailed demonstrations of the problem.

1) the page has both PG_dirty(D)/PAGECACHE_TAG_DIRTY(d) after being written to;
   and then only PAGECACHE_TAG_DIRTY(d) remains after the file is closed.

------------------------------ screen 0 ------------------------------
[T0] root /home/wfg# cat > /test/tiny
[T1] hi
[T2] root /home/wfg#

------------------------------ screen 1 ------------------------------
[T1] root /home/wfg# echo /test/tiny > /proc/filecache
[T1] root /home/wfg# cat /proc/filecache
     # file /test/tiny
     # flags R:referenced A:active M:mmap U:uptodate D:dirty W:writeback O:owner B:buffer d:dirty w:writeback
     # idx   len     state   refcnt
     0       1       ___UD__Bd_      2
[T2] root /home/wfg# cat /proc/filecache
     # file /test/tiny
     # flags R:referenced A:active M:mmap U:uptodate D:dirty W:writeback O:owner B:buffer d:dirty w:writeback
     # idx   len     state   refcnt
     0       1       ___U___Bd_      2

2) note the non-zero 'cancelled_write_bytes' after /tmp/hi is copied.

------------------------------ screen 0 ------------------------------
[T0] root /home/wfg# echo hi > /tmp/hi
[T1] root /home/wfg# cp /tmp/hi /dev/stdin /test
[T2] hi
[T3] root /home/wfg#

------------------------------ screen 1 ------------------------------
[T1] root /proc/4397# cd /proc/`pidof cp`
[T1] root /proc/4713# cat io
     rchar: 8396
     wchar: 3
     syscr: 20
     syscw: 1
     read_bytes: 0
     write_bytes: 20480
     cancelled_write_bytes: 4096
[T2] root /proc/4713# cat io
     rchar: 8399
     wchar: 6
     syscr: 21
     syscw: 2
     read_bytes: 0
     write_bytes: 24576
     cancelled_write_bytes: 4096

//Question: the 'write_bytes' is a bit more than expected ;-)

Cc: Maxim Levitsky <maximlevitsky@gmail.com>                                                                           
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
---
 fs/reiserfs/stree.c |    3 ---
 1 file changed, 3 deletions(-)

--- linux-2.6.24-git17.orig/fs/reiserfs/stree.c
+++ linux-2.6.24-git17/fs/reiserfs/stree.c
@@ -1458,9 +1458,6 @@ static void unmap_buffers(struct page *p
 				}
 				bh = next;
 			} while (bh != head);
-			if (PAGE_SIZE == bh->b_size) {
-				cancel_dirty_page(page, PAGE_CACHE_SIZE);
-			}
 		}
 	}
 }


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [PATCH] reiserfs: don't drop PG_dirty when releasing sub-page-sized dirty file
       [not found]                 ` <393126119.26275@ustc.edu.cn>
  2007-10-23  7:55                   ` [PATCH] reiserfs: don't drop PG_dirty when releasing sub-page-sized dirty file Fengguang Wu
@ 2007-10-23 10:07                   ` Peter Zijlstra
       [not found]                     ` <393140585.27414@ustc.edu.cn>
  2007-10-23 10:17                   ` Maxim Levitsky
  2 siblings, 1 reply; 61+ messages in thread
From: Peter Zijlstra @ 2007-10-23 10:07 UTC (permalink / raw)
  To: Fengguang Wu
  Cc: Maxim Levitsky, linux-kernel, Fengguang Wu, Andrew Morton,
	Jeff Mahoney, reiserfs-dev

[ adding reiserfs devs to the CC ]

On Tue, 2007-10-23 at 15:55 +0800, Fengguang Wu wrote:
> This is not a new problem in 2.6.23-git17.
> 2.6.22/2.6.23 is buggy in the same way.
> 
> Reiserfs could leave newly created sub-page-size files in dirty state
> for ever.  They cannot be synced to disk by pdflush routines or
> explicit `sync' commands.  Only `umount' can do the trick.
> 
> The direct cause is: the dirty page's PG_dirty is wrongly _cleared_.
> Call trace:
> 	 [<ffffffff8027e920>] cancel_dirty_page+0xd0/0xf0
> 	 [<ffffffff8816d470>] :reiserfs:reiserfs_cut_from_item+0x660/0x710
> 	 [<ffffffff8816d791>] :reiserfs:reiserfs_do_truncate+0x271/0x530
> 	 [<ffffffff8815872d>] :reiserfs:reiserfs_truncate_file+0xfd/0x3b0
> 	 [<ffffffff8815d3d0>] :reiserfs:reiserfs_file_release+0x1e0/0x340
> 	 [<ffffffff802a187c>] __fput+0xcc/0x1b0
> 	 [<ffffffff802a1ba6>] fput+0x16/0x20
> 	 [<ffffffff8029e676>] filp_close+0x56/0x90
> 	 [<ffffffff8029fe0d>] sys_close+0xad/0x110
> 	 [<ffffffff8020c41e>] system_call+0x7e/0x83
> 
> Fix the bug by removing the cancel_dirty_page() call. Tests show that
> it causes no bad behaviors on various write sizes.
> 
> 
> === for the patient ===
> Here are more detailed demonstrations of the problem.
> 
> 1) the page has both PG_dirty(D)/PAGECACHE_TAG_DIRTY(d) after being written to;
>    and then only PAGECACHE_TAG_DIRTY(d) remains after the file is closed.
> 
> ------------------------------ screen 0 ------------------------------
> [T0] root /home/wfg# cat > /test/tiny
> [T1] hi
> [T2] root /home/wfg#
> 
> ------------------------------ screen 1 ------------------------------
> [T1] root /home/wfg# echo /test/tiny > /proc/filecache
> [T1] root /home/wfg# cat /proc/filecache
>      # file /test/tiny
>      # flags R:referenced A:active M:mmap U:uptodate D:dirty W:writeback O:owner B:buffer d:dirty w:writeback
>      # idx   len     state   refcnt
>      0       1       ___UD__Bd_      2
> [T2] root /home/wfg# cat /proc/filecache
>      # file /test/tiny
>      # flags R:referenced A:active M:mmap U:uptodate D:dirty W:writeback O:owner B:buffer d:dirty w:writeback
>      # idx   len     state   refcnt
>      0       1       ___U___Bd_      2
> 
> 2) note the non-zero 'cancelled_write_bytes' after /tmp/hi is copied.
> 
> ------------------------------ screen 0 ------------------------------
> [T0] root /home/wfg# echo hi > /tmp/hi
> [T1] root /home/wfg# cp /tmp/hi /dev/stdin /test
> [T2] hi
> [T3] root /home/wfg#
> 
> ------------------------------ screen 1 ------------------------------
> [T1] root /proc/4397# cd /proc/`pidof cp`
> [T1] root /proc/4713# cat io
>      rchar: 8396
>      wchar: 3
>      syscr: 20
>      syscw: 1
>      read_bytes: 0
>      write_bytes: 20480
>      cancelled_write_bytes: 4096
> [T2] root /proc/4713# cat io
>      rchar: 8399
>      wchar: 6
>      syscr: 21
>      syscw: 2
>      read_bytes: 0
>      write_bytes: 24576
>      cancelled_write_bytes: 4096
> 
> //Question: the 'write_bytes' is a bit more than expected ;-)
> 
> Cc: Maxim Levitsky <maximlevitsky@gmail.com>                                                                           
> Cc: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
> ---
>  fs/reiserfs/stree.c |    3 ---
>  1 file changed, 3 deletions(-)
> 
> --- linux-2.6.24-git17.orig/fs/reiserfs/stree.c
> +++ linux-2.6.24-git17/fs/reiserfs/stree.c
> @@ -1458,9 +1458,6 @@ static void unmap_buffers(struct page *p
>  				}
>  				bh = next;
>  			} while (bh != head);
> -			if (PAGE_SIZE == bh->b_size) {
> -				cancel_dirty_page(page, PAGE_CACHE_SIZE);
> -			}
>  		}
>  	}
>  }
> 


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [PATCH] reiserfs: don't drop PG_dirty when releasing sub-page-sized dirty file
       [not found]                 ` <393126119.26275@ustc.edu.cn>
  2007-10-23  7:55                   ` [PATCH] reiserfs: don't drop PG_dirty when releasing sub-page-sized dirty file Fengguang Wu
  2007-10-23 10:07                   ` Peter Zijlstra
@ 2007-10-23 10:17                   ` Maxim Levitsky
       [not found]                     ` <393150504.32739@ustc.edu.cn>
  2 siblings, 1 reply; 61+ messages in thread
From: Maxim Levitsky @ 2007-10-23 10:17 UTC (permalink / raw)
  To: Fengguang Wu; +Cc: Peter Zijlstra, linux-kernel, Fengguang Wu, Andrew Morton

On Tuesday 23 October 2007 09:55:14 Fengguang Wu wrote:
> This is not a new problem in 2.6.23-git17.
> 2.6.22/2.6.23 is buggy in the same way.
> 
> Reiserfs could leave newly created sub-page-size files in dirty state
> for ever.  They cannot be synced to disk by pdflush routines or
> explicit `sync' commands.  Only `umount' can do the trick.
> 
> The direct cause is: the dirty page's PG_dirty is wrongly _cleared_.
> Call trace:
> 	 [<ffffffff8027e920>] cancel_dirty_page+0xd0/0xf0
> 	 [<ffffffff8816d470>] :reiserfs:reiserfs_cut_from_item+0x660/0x710
> 	 [<ffffffff8816d791>] :reiserfs:reiserfs_do_truncate+0x271/0x530
> 	 [<ffffffff8815872d>] :reiserfs:reiserfs_truncate_file+0xfd/0x3b0
> 	 [<ffffffff8815d3d0>] :reiserfs:reiserfs_file_release+0x1e0/0x340
> 	 [<ffffffff802a187c>] __fput+0xcc/0x1b0
> 	 [<ffffffff802a1ba6>] fput+0x16/0x20
> 	 [<ffffffff8029e676>] filp_close+0x56/0x90
> 	 [<ffffffff8029fe0d>] sys_close+0xad/0x110
> 	 [<ffffffff8020c41e>] system_call+0x7e/0x83
> 
> Fix the bug by removing the cancel_dirty_page() call. Tests show that
> it causes no bad behaviors on various write sizes.
> 
> 
> === for the patient ===
> Here are more detailed demonstrations of the problem.
> 
> 1) the page has both PG_dirty(D)/PAGECACHE_TAG_DIRTY(d) after being written to;
>    and then only PAGECACHE_TAG_DIRTY(d) remains after the file is closed.
> 
> ------------------------------ screen 0 ------------------------------
> [T0] root /home/wfg# cat > /test/tiny
> [T1] hi
> [T2] root /home/wfg#
> 
> ------------------------------ screen 1 ------------------------------
> [T1] root /home/wfg# echo /test/tiny > /proc/filecache
> [T1] root /home/wfg# cat /proc/filecache
>      # file /test/tiny
>      # flags R:referenced A:active M:mmap U:uptodate D:dirty W:writeback O:owner B:buffer d:dirty w:writeback
>      # idx   len     state   refcnt
>      0       1       ___UD__Bd_      2
> [T2] root /home/wfg# cat /proc/filecache
>      # file /test/tiny
>      # flags R:referenced A:active M:mmap U:uptodate D:dirty W:writeback O:owner B:buffer d:dirty w:writeback
>      # idx   len     state   refcnt
>      0       1       ___U___Bd_      2
> 
> 2) note the non-zero 'cancelled_write_bytes' after /tmp/hi is copied.
> 
> ------------------------------ screen 0 ------------------------------
> [T0] root /home/wfg# echo hi > /tmp/hi
> [T1] root /home/wfg# cp /tmp/hi /dev/stdin /test
> [T2] hi
> [T3] root /home/wfg#
> 
> ------------------------------ screen 1 ------------------------------
> [T1] root /proc/4397# cd /proc/`pidof cp`
> [T1] root /proc/4713# cat io
>      rchar: 8396
>      wchar: 3
>      syscr: 20
>      syscw: 1
>      read_bytes: 0
>      write_bytes: 20480
>      cancelled_write_bytes: 4096
> [T2] root /proc/4713# cat io
>      rchar: 8399
>      wchar: 6
>      syscr: 21
>      syscw: 2
>      read_bytes: 0
>      write_bytes: 24576
>      cancelled_write_bytes: 4096
> 
> //Question: the 'write_bytes' is a bit more than expected ;-)
> 
> Cc: Maxim Levitsky <maximlevitsky@gmail.com>                                                                           
> Cc: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
> ---
>  fs/reiserfs/stree.c |    3 ---
>  1 file changed, 3 deletions(-)
> 
> --- linux-2.6.24-git17.orig/fs/reiserfs/stree.c
> +++ linux-2.6.24-git17/fs/reiserfs/stree.c
> @@ -1458,9 +1458,6 @@ static void unmap_buffers(struct page *p
>  				}
>  				bh = next;
>  			} while (bh != head);
> -			if (PAGE_SIZE == bh->b_size) {
> -				cancel_dirty_page(page, PAGE_CACHE_SIZE);
> -			}
>  		}
>  	}
>  }
> 
> 

One thing to say... Works perfectly!
Big thanks for fixing that bug.


Best regards,
	Maxim Levitsky

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [PATCH] reiserfs: don't drop PG_dirty when releasing sub-page-sized dirty file
       [not found]                     ` <393140585.27414@ustc.edu.cn>
@ 2007-10-23 11:56                       ` Fengguang Wu
  2007-10-23 14:10                       ` Chris Mason
  1 sibling, 0 replies; 61+ messages in thread
From: Fengguang Wu @ 2007-10-23 11:56 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Maxim Levitsky, linux-kernel, Andrew Morton, Jeff Mahoney,
	reiserfs-dev, linux-fsdevel

On Tue, Oct 23, 2007 at 12:07:07PM +0200, Peter Zijlstra wrote:
> [ adding reiserfs devs to the CC ]

Thank you.

This fix is kind of crude - even when it fixed Maxim's problem, and
survived my stress testing of a lot of patching and kernel compiling.
I'd be glad to see better solutions.

Fengguang
---

reiserfs: don't drop PG_dirty when releasing sub-page-sized dirty file

This is not a new problem in 2.6.23-git17.
2.6.22/2.6.23 is buggy in the same way.

Reiserfs could accumulate dirty sub-page-size files until umount time.
They cannot be synced to disk by pdflush routines or explicit `sync'
commands.  Only `umount' can do the trick.

The direct cause is: the dirty page's PG_dirty is wrongly _cleared_.
Call trace:
	 [<ffffffff8027e920>] cancel_dirty_page+0xd0/0xf0
	 [<ffffffff8816d470>] :reiserfs:reiserfs_cut_from_item+0x660/0x710
	 [<ffffffff8816d791>] :reiserfs:reiserfs_do_truncate+0x271/0x530
	 [<ffffffff8815872d>] :reiserfs:reiserfs_truncate_file+0xfd/0x3b0
	 [<ffffffff8815d3d0>] :reiserfs:reiserfs_file_release+0x1e0/0x340
	 [<ffffffff802a187c>] __fput+0xcc/0x1b0
	 [<ffffffff802a1ba6>] fput+0x16/0x20
	 [<ffffffff8029e676>] filp_close+0x56/0x90
	 [<ffffffff8029fe0d>] sys_close+0xad/0x110
	 [<ffffffff8020c41e>] system_call+0x7e/0x83

Fix the bug by removing the cancel_dirty_page() call. Tests show that
it causes no bad behaviors on various write sizes.


=== for the patient ===
Here are more detailed demonstrations of the problem.

1) the page has both PG_dirty(D)/PAGECACHE_TAG_DIRTY(d) after being written to;
   and then only PAGECACHE_TAG_DIRTY(d) remains after the file is closed.

------------------------------ screen 0 ------------------------------
[T0] root /home/wfg# cat > /test/tiny
[T1] hi
[T2] root /home/wfg#

------------------------------ screen 1 ------------------------------
[T1] root /home/wfg# echo /test/tiny > /proc/filecache
[T1] root /home/wfg# cat /proc/filecache
     # file /test/tiny
     # flags R:referenced A:active M:mmap U:uptodate D:dirty W:writeback O:owner B:buffer d:dirty w:writeback
     # idx   len     state   refcnt
     0       1       ___UD__Bd_      2
[T2] root /home/wfg# cat /proc/filecache
     # file /test/tiny
     # flags R:referenced A:active M:mmap U:uptodate D:dirty W:writeback O:owner B:buffer d:dirty w:writeback
     # idx   len     state   refcnt
     0       1       ___U___Bd_      2

2) note the non-zero 'cancelled_write_bytes' after /tmp/hi is copied.

------------------------------ screen 0 ------------------------------
[T0] root /home/wfg# echo hi > /tmp/hi
[T1] root /home/wfg# cp /tmp/hi /dev/stdin /test
[T2] hi
[T3] root /home/wfg#

------------------------------ screen 1 ------------------------------
[T1] root /proc/4397# cd /proc/`pidof cp`
[T1] root /proc/4713# cat io
     rchar: 8396
     wchar: 3
     syscr: 20
     syscw: 1
     read_bytes: 0
     write_bytes: 20480
     cancelled_write_bytes: 4096
[T2] root /proc/4713# cat io
     rchar: 8399
     wchar: 6
     syscr: 21
     syscw: 2
     read_bytes: 0
     write_bytes: 24576
     cancelled_write_bytes: 4096

//Question: the 'write_bytes' is a bit more than expected ;-)

Cc: Maxim Levitsky <maximlevitsky@gmail.com>                                                                           
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
---
 fs/reiserfs/stree.c |    3 ---
 1 file changed, 3 deletions(-)

--- linux-2.6.24-git17.orig/fs/reiserfs/stree.c
+++ linux-2.6.24-git17/fs/reiserfs/stree.c
@@ -1458,9 +1458,6 @@ static void unmap_buffers(struct page *p
 				}
 				bh = next;
 			} while (bh != head);
-			if (PAGE_SIZE == bh->b_size) {
-				cancel_dirty_page(page, PAGE_CACHE_SIZE);
-			}
 		}
 	}
 }


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [PATCH] reiserfs: don't drop PG_dirty when releasing sub-page-sized dirty file
       [not found]                     ` <393140585.27414@ustc.edu.cn>
  2007-10-23 11:56                       ` Fengguang Wu
@ 2007-10-23 14:10                       ` Chris Mason
       [not found]                         ` <393150419.31806@ustc.edu.cn>
  1 sibling, 1 reply; 61+ messages in thread
From: Chris Mason @ 2007-10-23 14:10 UTC (permalink / raw)
  To: Fengguang Wu
  Cc: Peter Zijlstra, Maxim Levitsky, linux-kernel, Andrew Morton,
	Jeff Mahoney, reiserfs-dev, linux-fsdevel

On Tue, 23 Oct 2007 19:56:20 +0800
Fengguang Wu <wfg@mail.ustc.edu.cn> wrote:

> On Tue, Oct 23, 2007 at 12:07:07PM +0200, Peter Zijlstra wrote:
> > [ adding reiserfs devs to the CC ]
> 
> Thank you.
> 
> This fix is kind of crude - even when it fixed Maxim's problem, and
> survived my stress testing of a lot of patching and kernel compiling.
> I'd be glad to see better solutions.

This should be safe, reiserfs has the buffer heads themselves clean and
the page should get cleaned eventually.  The cancel_dirty_page call was
just an optimization to be VM friendly.

-chris

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [PATCH] reiserfs: don't drop PG_dirty when releasing sub-page-sized dirty file
       [not found]                         ` <393150419.31806@ustc.edu.cn>
@ 2007-10-23 14:40                           ` Fengguang Wu
  0 siblings, 0 replies; 61+ messages in thread
From: Fengguang Wu @ 2007-10-23 14:40 UTC (permalink / raw)
  To: Chris Mason
  Cc: Peter Zijlstra, Maxim Levitsky, linux-kernel, Andrew Morton,
	Jeff Mahoney, reiserfs-dev, linux-fsdevel

On Tue, Oct 23, 2007 at 10:10:53AM -0400, Chris Mason wrote:
> On Tue, 23 Oct 2007 19:56:20 +0800
> Fengguang Wu <wfg@mail.ustc.edu.cn> wrote:
> 
> > On Tue, Oct 23, 2007 at 12:07:07PM +0200, Peter Zijlstra wrote:
> > > [ adding reiserfs devs to the CC ]
> > 
> > Thank you.
> > 
> > This fix is kind of crude - even when it fixed Maxim's problem, and
> > survived my stress testing of a lot of patching and kernel compiling.
> > I'd be glad to see better solutions.
> 
> This should be safe, reiserfs has the buffer heads themselves clean and
> the page should get cleaned eventually.  The cancel_dirty_page call was
> just an optimization to be VM friendly.
 
> -chris

'chris' as in fs/reiserfs/{inode.c,namei.c}, and now in btrfs/*?

Nice to meet you ;-)

Fengguang


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: [PATCH] reiserfs: don't drop PG_dirty when releasing sub-page-sized dirty file
       [not found]                     ` <393150504.32739@ustc.edu.cn>
@ 2007-10-23 14:41                       ` Fengguang Wu
  0 siblings, 0 replies; 61+ messages in thread
From: Fengguang Wu @ 2007-10-23 14:41 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: Peter Zijlstra, linux-kernel, Fengguang Wu, Andrew Morton

On Tue, Oct 23, 2007 at 12:17:51PM +0200, Maxim Levitsky wrote:
> > ---
> >  fs/reiserfs/stree.c |    3 ---
> >  1 file changed, 3 deletions(-)
> > 
> > --- linux-2.6.24-git17.orig/fs/reiserfs/stree.c
> > +++ linux-2.6.24-git17/fs/reiserfs/stree.c
> > @@ -1458,9 +1458,6 @@ static void unmap_buffers(struct page *p
> >  				}
> >  				bh = next;
> >  			} while (bh != head);
> > -			if (PAGE_SIZE == bh->b_size) {
> > -				cancel_dirty_page(page, PAGE_CACHE_SIZE);
> > -			}
> >  		}
> >  	}
> >  }
> > 
> > 
> 
> One thing to say... Works perfectly!
> Big thanks for fixing that bug.

And many thanks for your testing~

Fengguang


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
       [not found]                         ` <393060478.03650@ustc.edu.cn>
  2007-10-22 13:41                           ` Fengguang Wu
@ 2007-10-31 15:22                           ` Torsten Kaiser
       [not found]                             ` <393903856.06449@ustc.edu.cn>
  1 sibling, 1 reply; 61+ messages in thread
From: Torsten Kaiser @ 2007-10-31 15:22 UTC (permalink / raw)
  To: Fengguang Wu; +Cc: Maxim Levitsky, Peter Zijlstra, linux-kernel, Andrew Morton

On 10/22/07, Fengguang Wu <wfg@mail.ustc.edu.cn> wrote:
> On Mon, Oct 22, 2007 at 09:10:45PM +0800, Fengguang Wu wrote:
> > Hmm, Maybe it's an reiserfs related issue.  Do you have the full log file?
>
> Bingo! It can be reproduced in -mm on reiserfs:
>
> # mkfs.reiserfs /dev/sdb1
> # mount /dev/sdb1 /test
> # cp bin /test
> <wait for a while>
> # dmesg
> [...]
> [  418.346113] requeue_io 308: inode 6 size 302 at 08:11(sdb1)
> [  418.346119] requeue_io 308: inode 7 size 196 at 08:11(sdb1)
> [  418.346125] requeue_io 308: inode 8 size 85 at 08:11(sdb1)

Since 2.6.23-mm1 I also experience strange hangs during heavy writeouts.
Each time I noticed this I was using emerge (package util from the
gentoo distribution) to install/upgrade a package. The last step,
where this hang occurred, is moving the prepared files from a tmpfs
partion to the main xfs filesystem.
The hangs where not fatal, after a few second everything resumed
normal, so I was not able to capture a good image of what was
happening.

Today it happend again, but a little more obvious. During the moving
process the writeout stalled completly for several minutes until I hit
SysRq+W.

/proc/meminfo:
MemTotal:      4061808 kB
MemFree:        881332 kB
Buffers:             0 kB
Cached:        2566628 kB
SwapCached:         64 kB
Active:         926612 kB
Inactive:      1959136 kB
SwapTotal:     9775416 kB
SwapFree:      9775296 kB
Dirty:           44948 kB
Writeback:           0 kB
AnonPages:      319068 kB
Mapped:          52844 kB
Slab:           235572 kB
SReclaimable:   164408 kB
SUnreclaim:      71164 kB
PageTables:       9576 kB
NFS_Unstable:        0 kB
Bounce:              0 kB
CommitLimit:  11806320 kB
Committed_AS:   544520 kB
VmallocTotal: 34359738367 kB
VmallocUsed:     35004 kB
VmallocChunk: 34359702447 kB
HugePages_Total:     0
HugePages_Free:      0
HugePages_Rsvd:      0
HugePages_Surp:      0
Hugepagesize:     2048 kB

The 'Dirty' count did not decrease during this time and 'Writeback' stayed at 0.
I also have /proc/pagetypeinfo ,but I see nothing interessing in
there. (But will send it, if needed)

The output from SysRq+W:
SysRq : Show Blocked State
  task                        PC stack   pid father
pdflush       D ffff81001fcc2a88     0   285      2
 ffff810005d55580 0000000000000046 0000000000000800 0000007000000001
 0000000000000400 ffffffff8022d61c ffffffff80817b00 ffffffff80817b00
 ffffffff80813f40 ffffffff80817b00 ffff810100893b18 0000000000000000
Call Trace:
 [<ffffffff8022d61c>] task_rq_lock+0x4c/0x90
 [<ffffffff8022c8ea>] __wake_up_common+0x5a/0x90
 [<ffffffff805b14c7>] __down+0xa7/0x11e
 [<ffffffff8022da70>] default_wake_function+0x0/0x10
 [<ffffffff805b1145>] __down_failed+0x35/0x3a
 [<ffffffff803750be>] xfs_buf_lock+0x3e/0x40
 [<ffffffff803771fe>] _xfs_buf_find+0x13e/0x240
 [<ffffffff8037736f>] xfs_buf_get_flags+0x6f/0x190
 [<ffffffff803774a2>] xfs_buf_read_flags+0x12/0xa0
 [<ffffffff80368614>] xfs_trans_read_buf+0x64/0x340
 [<ffffffff80352151>] xfs_itobp+0x81/0x1e0
 [<ffffffff803759de>] xfs_buf_rele+0x2e/0xd0
 [<ffffffff80354afe>] xfs_iflush+0xfe/0x520
 [<ffffffff803ae3b2>] __down_read_trylock+0x42/0x60
 [<ffffffff80355a72>] xfs_inode_item_push+0x12/0x20
 [<ffffffff80368037>] xfs_trans_push_ail+0x267/0x2b0
 [<ffffffff8035a33b>] xlog_ticket_get+0xfb/0x140
 [<ffffffff8035c5ae>] xfs_log_reserve+0xee/0x120
 [<ffffffff803669e8>] xfs_trans_reserve+0xa8/0x210
 [<ffffffff8035703a>] xfs_iomap_write_allocate+0xfa/0x410
 [<ffffffff804ce67a>] __split_bio+0x38a/0x3c0
 [<ffffffff80373657>] xfs_start_page_writeback+0x27/0x60
 [<ffffffff8035660c>] xfs_iomap+0x26c/0x310
 [<ffffffff803735d8>] xfs_map_blocks+0x38/0x90
 [<ffffffff80374a88>] xfs_page_state_convert+0x2b8/0x630
 [<ffffffff80374f5f>] xfs_vm_writepage+0x6f/0x120
 [<ffffffff8026acda>] __writepage+0xa/0x30
 [<ffffffff8026b2ce>] write_cache_pages+0x23e/0x330
 [<ffffffff8026acd0>] __writepage+0x0/0x30
 [<ffffffff80354db7>] xfs_iflush+0x3b7/0x520
 [<ffffffff80375782>] _xfs_buf_ioapply+0x222/0x320
 [<ffffffff803ae451>] __up_read+0x21/0xb0
 [<ffffffff8034f22c>] xfs_iunlock+0x5c/0xc0
 [<ffffffff8026b410>] do_writepages+0x20/0x40
 [<ffffffff802b36a0>] __writeback_single_inode+0xb0/0x380
 [<ffffffff804d052e>] dm_table_any_congested+0x2e/0x80
 [<ffffffff802b3d8d>] generic_sync_sb_inodes+0x20d/0x330
 [<ffffffff802b4322>] writeback_inodes+0xa2/0xe0
 [<ffffffff8026bde6>] wb_kupdate+0xa6/0x120
 [<ffffffff8026c2a0>] pdflush+0x0/0x1e0
 [<ffffffff8026c3b0>] pdflush+0x110/0x1e0
 [<ffffffff8026bd40>] wb_kupdate+0x0/0x120
 [<ffffffff8024a32b>] kthread+0x4b/0x80
 [<ffffffff8020c9d8>] child_rip+0xa/0x12
 [<ffffffff8024a2e0>] kthread+0x0/0x80
 [<ffffffff8020c9ce>] child_rip+0x0/0x12

emerge        D ffff81001fcc2a88     0  3221   8163
 ffff81008c0679f8 0000000000000086 ffff81008c067988 ffffffff8024a719
 ffff8100060fb008 ffffffff8022c8ea ffffffff80817b00 ffffffff80817b00
 ffffffff80813f40 ffffffff80817b00 ffff810100893b18 0000000000000000
Call Trace:
 [<ffffffff8024a719>] autoremove_wake_function+0x9/0x30
 [<ffffffff8022c8ea>] __wake_up_common+0x5a/0x90
 [<ffffffff8022c8ea>] __wake_up_common+0x5a/0x90
 [<ffffffff805b14c7>] __down+0xa7/0x11e
 [<ffffffff8022da70>] default_wake_function+0x0/0x10
 [<ffffffff805b1145>] __down_failed+0x35/0x3a
 [<ffffffff803750be>] xfs_buf_lock+0x3e/0x40
 [<ffffffff803771fe>] _xfs_buf_find+0x13e/0x240
 [<ffffffff8037736f>] xfs_buf_get_flags+0x6f/0x190
 [<ffffffff803774a2>] xfs_buf_read_flags+0x12/0xa0
 [<ffffffff80368614>] xfs_trans_read_buf+0x64/0x340
 [<ffffffff80352151>] xfs_itobp+0x81/0x1e0
 [<ffffffff803759de>] xfs_buf_rele+0x2e/0xd0
 [<ffffffff80354afe>] xfs_iflush+0xfe/0x520
 [<ffffffff803ae3b2>] __down_read_trylock+0x42/0x60
 [<ffffffff80355a72>] xfs_inode_item_push+0x12/0x20
 [<ffffffff80368037>] xfs_trans_push_ail+0x267/0x2b0
 [<ffffffff8035c532>] xfs_log_reserve+0x72/0x120
 [<ffffffff803669e8>] xfs_trans_reserve+0xa8/0x210
 [<ffffffff80372fe2>] kmem_zone_zalloc+0x32/0x50
 [<ffffffff8035242b>] xfs_itruncate_finish+0xfb/0x310
 [<ffffffff8036d8db>] xfs_free_eofblocks+0x23b/0x280
 [<ffffffff80371d83>] xfs_release+0x153/0x200
 [<ffffffff80377e00>] xfs_file_release+0x10/0x20
 [<ffffffff80294041>] __fput+0xb1/0x220
 [<ffffffff80290e94>] filp_close+0x54/0x90
 [<ffffffff802927af>] sys_close+0x9f/0x100
 [<ffffffff8020bbbe>] system_call+0x7e/0x83


After this SysRq+W writeback resumed again. Possible that writing
above into the syslog triggered that.
The source tmpfs is mounted with any special parameters, but the
target xfs filesystem resides on a dm-crypt device that is on top a 3
disk RAID5 md.
During the hang all CPUs where idle.
The system is x86_64 with CONFIG_NO_HZ=y, but was still receiving ~330
interrupts per second because of the bttv driver. (But I was not using
that device at this time.)

I'm willing to test patches or more provide more information, but lack
a good testcase to trigger this on demand.

Torsten

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
       [not found]                             ` <393903856.06449@ustc.edu.cn>
@ 2007-11-01  7:57                               ` Fengguang Wu
  2007-11-01 18:20                               ` Torsten Kaiser
  1 sibling, 0 replies; 61+ messages in thread
From: Fengguang Wu @ 2007-11-01  7:57 UTC (permalink / raw)
  To: Torsten Kaiser
  Cc: Maxim Levitsky, Peter Zijlstra, linux-kernel, Andrew Morton

[-- Attachment #1: Type: text/plain, Size: 7552 bytes --]

On Wed, Oct 31, 2007 at 04:22:10PM +0100, Torsten Kaiser wrote:
> On 10/22/07, Fengguang Wu <wfg@mail.ustc.edu.cn> wrote:
> > On Mon, Oct 22, 2007 at 09:10:45PM +0800, Fengguang Wu wrote:
> > > Hmm, Maybe it's an reiserfs related issue.  Do you have the full log file?
> >
> > Bingo! It can be reproduced in -mm on reiserfs:
> >
> > # mkfs.reiserfs /dev/sdb1
> > # mount /dev/sdb1 /test
> > # cp bin /test
> > <wait for a while>
> > # dmesg
> > [...]
> > [  418.346113] requeue_io 308: inode 6 size 302 at 08:11(sdb1)
> > [  418.346119] requeue_io 308: inode 7 size 196 at 08:11(sdb1)
> > [  418.346125] requeue_io 308: inode 8 size 85 at 08:11(sdb1)
> 
> Since 2.6.23-mm1 I also experience strange hangs during heavy writeouts.
> Each time I noticed this I was using emerge (package util from the
> gentoo distribution) to install/upgrade a package. The last step,
> where this hang occurred, is moving the prepared files from a tmpfs
> partion to the main xfs filesystem.
> The hangs where not fatal, after a few second everything resumed
> normal, so I was not able to capture a good image of what was
> happening.

Thank you for the detailed report.

How severe was the hangs? Only writeouts stalled, all apps stalled, or
cannot type and run new commands?

> Today it happend again, but a little more obvious. During the moving
> process the writeout stalled completly for several minutes until I hit
> SysRq+W.
> 
> /proc/meminfo:
> MemTotal:      4061808 kB
> MemFree:        881332 kB
> Buffers:             0 kB
> Cached:        2566628 kB
> SwapCached:         64 kB
> Active:         926612 kB
> Inactive:      1959136 kB
> SwapTotal:     9775416 kB
> SwapFree:      9775296 kB
> Dirty:           44948 kB
> Writeback:           0 kB
> AnonPages:      319068 kB
> Mapped:          52844 kB
> Slab:           235572 kB
> SReclaimable:   164408 kB
> SUnreclaim:      71164 kB
> PageTables:       9576 kB
> NFS_Unstable:        0 kB
> Bounce:              0 kB
> CommitLimit:  11806320 kB
> Committed_AS:   544520 kB
> VmallocTotal: 34359738367 kB
> VmallocUsed:     35004 kB
> VmallocChunk: 34359702447 kB
> HugePages_Total:     0
> HugePages_Free:      0
> HugePages_Rsvd:      0
> HugePages_Surp:      0
> Hugepagesize:     2048 kB
> 
> The 'Dirty' count did not decrease during this time and 'Writeback' stayed at 0.
> I also have /proc/pagetypeinfo ,but I see nothing interessing in
> there. (But will send it, if needed)
> 
> The output from SysRq+W:
> SysRq : Show Blocked State
>   task                        PC stack   pid father
> pdflush       D ffff81001fcc2a88     0   285      2
>  ffff810005d55580 0000000000000046 0000000000000800 0000007000000001
>  0000000000000400 ffffffff8022d61c ffffffff80817b00 ffffffff80817b00
>  ffffffff80813f40 ffffffff80817b00 ffff810100893b18 0000000000000000
> Call Trace:
>  [<ffffffff8022d61c>] task_rq_lock+0x4c/0x90
>  [<ffffffff8022c8ea>] __wake_up_common+0x5a/0x90
>  [<ffffffff805b14c7>] __down+0xa7/0x11e
>  [<ffffffff8022da70>] default_wake_function+0x0/0x10
>  [<ffffffff805b1145>] __down_failed+0x35/0x3a
>  [<ffffffff803750be>] xfs_buf_lock+0x3e/0x40
>  [<ffffffff803771fe>] _xfs_buf_find+0x13e/0x240
>  [<ffffffff8037736f>] xfs_buf_get_flags+0x6f/0x190
>  [<ffffffff803774a2>] xfs_buf_read_flags+0x12/0xa0
>  [<ffffffff80368614>] xfs_trans_read_buf+0x64/0x340
>  [<ffffffff80352151>] xfs_itobp+0x81/0x1e0
>  [<ffffffff803759de>] xfs_buf_rele+0x2e/0xd0
>  [<ffffffff80354afe>] xfs_iflush+0xfe/0x520
>  [<ffffffff803ae3b2>] __down_read_trylock+0x42/0x60
>  [<ffffffff80355a72>] xfs_inode_item_push+0x12/0x20
>  [<ffffffff80368037>] xfs_trans_push_ail+0x267/0x2b0
>  [<ffffffff8035a33b>] xlog_ticket_get+0xfb/0x140
>  [<ffffffff8035c5ae>] xfs_log_reserve+0xee/0x120
>  [<ffffffff803669e8>] xfs_trans_reserve+0xa8/0x210
>  [<ffffffff8035703a>] xfs_iomap_write_allocate+0xfa/0x410
>  [<ffffffff804ce67a>] __split_bio+0x38a/0x3c0
>  [<ffffffff80373657>] xfs_start_page_writeback+0x27/0x60
>  [<ffffffff8035660c>] xfs_iomap+0x26c/0x310
>  [<ffffffff803735d8>] xfs_map_blocks+0x38/0x90
>  [<ffffffff80374a88>] xfs_page_state_convert+0x2b8/0x630
>  [<ffffffff80374f5f>] xfs_vm_writepage+0x6f/0x120
>  [<ffffffff8026acda>] __writepage+0xa/0x30
>  [<ffffffff8026b2ce>] write_cache_pages+0x23e/0x330
>  [<ffffffff8026acd0>] __writepage+0x0/0x30
>  [<ffffffff80354db7>] xfs_iflush+0x3b7/0x520
>  [<ffffffff80375782>] _xfs_buf_ioapply+0x222/0x320
>  [<ffffffff803ae451>] __up_read+0x21/0xb0
>  [<ffffffff8034f22c>] xfs_iunlock+0x5c/0xc0
>  [<ffffffff8026b410>] do_writepages+0x20/0x40
>  [<ffffffff802b36a0>] __writeback_single_inode+0xb0/0x380
>  [<ffffffff804d052e>] dm_table_any_congested+0x2e/0x80
>  [<ffffffff802b3d8d>] generic_sync_sb_inodes+0x20d/0x330
>  [<ffffffff802b4322>] writeback_inodes+0xa2/0xe0
>  [<ffffffff8026bde6>] wb_kupdate+0xa6/0x120
>  [<ffffffff8026c2a0>] pdflush+0x0/0x1e0
>  [<ffffffff8026c3b0>] pdflush+0x110/0x1e0
>  [<ffffffff8026bd40>] wb_kupdate+0x0/0x120
>  [<ffffffff8024a32b>] kthread+0x4b/0x80
>  [<ffffffff8020c9d8>] child_rip+0xa/0x12
>  [<ffffffff8024a2e0>] kthread+0x0/0x80
>  [<ffffffff8020c9ce>] child_rip+0x0/0x12
> 
> emerge        D ffff81001fcc2a88     0  3221   8163
>  ffff81008c0679f8 0000000000000086 ffff81008c067988 ffffffff8024a719
>  ffff8100060fb008 ffffffff8022c8ea ffffffff80817b00 ffffffff80817b00
>  ffffffff80813f40 ffffffff80817b00 ffff810100893b18 0000000000000000
> Call Trace:
>  [<ffffffff8024a719>] autoremove_wake_function+0x9/0x30
>  [<ffffffff8022c8ea>] __wake_up_common+0x5a/0x90
>  [<ffffffff8022c8ea>] __wake_up_common+0x5a/0x90
>  [<ffffffff805b14c7>] __down+0xa7/0x11e
>  [<ffffffff8022da70>] default_wake_function+0x0/0x10
>  [<ffffffff805b1145>] __down_failed+0x35/0x3a
>  [<ffffffff803750be>] xfs_buf_lock+0x3e/0x40
>  [<ffffffff803771fe>] _xfs_buf_find+0x13e/0x240
>  [<ffffffff8037736f>] xfs_buf_get_flags+0x6f/0x190
>  [<ffffffff803774a2>] xfs_buf_read_flags+0x12/0xa0
>  [<ffffffff80368614>] xfs_trans_read_buf+0x64/0x340
>  [<ffffffff80352151>] xfs_itobp+0x81/0x1e0
>  [<ffffffff803759de>] xfs_buf_rele+0x2e/0xd0
>  [<ffffffff80354afe>] xfs_iflush+0xfe/0x520
>  [<ffffffff803ae3b2>] __down_read_trylock+0x42/0x60
>  [<ffffffff80355a72>] xfs_inode_item_push+0x12/0x20
>  [<ffffffff80368037>] xfs_trans_push_ail+0x267/0x2b0
>  [<ffffffff8035c532>] xfs_log_reserve+0x72/0x120
>  [<ffffffff803669e8>] xfs_trans_reserve+0xa8/0x210
>  [<ffffffff80372fe2>] kmem_zone_zalloc+0x32/0x50
>  [<ffffffff8035242b>] xfs_itruncate_finish+0xfb/0x310
>  [<ffffffff8036d8db>] xfs_free_eofblocks+0x23b/0x280
>  [<ffffffff80371d83>] xfs_release+0x153/0x200
>  [<ffffffff80377e00>] xfs_file_release+0x10/0x20
>  [<ffffffff80294041>] __fput+0xb1/0x220
>  [<ffffffff80290e94>] filp_close+0x54/0x90
>  [<ffffffff802927af>] sys_close+0x9f/0x100
>  [<ffffffff8020bbbe>] system_call+0x7e/0x83
> 
> 
> After this SysRq+W writeback resumed again. Possible that writing
> above into the syslog triggered that.

Maybe. Are the log files on another disk/partition?

> The source tmpfs is mounted with any special parameters, but the
> target xfs filesystem resides on a dm-crypt device that is on top a 3
> disk RAID5 md.
> During the hang all CPUs where idle.

No iowaits? ;-)

> The system is x86_64 with CONFIG_NO_HZ=y, but was still receiving ~330
> interrupts per second because of the bttv driver. (But I was not using
> that device at this time.)
> 
> I'm willing to test patches or more provide more information, but lack
> a good testcase to trigger this on demand.

Thank you. Maybe we can start by the applied debug patch :-)

Fengguang

[-- Attachment #2: writeback-debug.patch --]
[-- Type: text/x-diff, Size: 1926 bytes --]

---
 mm/page-writeback.c |   23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

--- linux-2.6.23-rc8-mm2.orig/mm/page-writeback.c
+++ linux-2.6.23-rc8-mm2/mm/page-writeback.c
@@ -98,6 +98,26 @@ EXPORT_SYMBOL(laptop_mode);
 
 /* End of sysctl-exported parameters */
 
+#define writeback_debug_report(n, wbc) do {                               \
+	__writeback_debug_report(n, wbc, __FILE__, __LINE__, __FUNCTION__); \
+} while (0)
+
+void __writeback_debug_report(long n, struct writeback_control *wbc,
+		const char *file, int line, const char *func)
+{
+	printk("%s %d %s: %s(%d) %ld "
+			"global %lu %lu %lu "
+			"wc %c%c tw %ld sk %ld\n",
+			file, line, func,
+			current->comm, current->pid, n,
+			global_page_state(NR_FILE_DIRTY),
+			global_page_state(NR_WRITEBACK),
+			global_page_state(NR_UNSTABLE_NFS),
+			wbc->encountered_congestion ? 'C':'_',
+			wbc->more_io ? 'M':'_',
+			wbc->nr_to_write,
+			wbc->pages_skipped);
+}
 
 static void background_writeout(unsigned long _min_pages);
 
@@ -404,6 +424,7 @@ static void balance_dirty_pages(struct a
 			pages_written += write_chunk - wbc.nr_to_write;
 			get_dirty_limits(&background_thresh, &dirty_thresh,
 				       &bdi_thresh, bdi);
+			writeback_debug_report(pages_written, &wbc);
 		}
 
 		/*
@@ -568,6 +589,7 @@ static void background_writeout(unsigned
 		wbc.pages_skipped = 0;
 		writeback_inodes(&wbc);
 		min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+		writeback_debug_report(min_pages, &wbc);
 		if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
 			/* Wrote less than expected */
 			if (wbc.encountered_congestion)
@@ -643,6 +665,7 @@ static void wb_kupdate(unsigned long arg
 		wbc.encountered_congestion = 0;
 		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
 		writeback_inodes(&wbc);
+		writeback_debug_report(nr_to_write, &wbc);
 		if (wbc.nr_to_write > 0) {
 			if (wbc.encountered_congestion)
 				congestion_wait(WRITE, HZ/10);

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
       [not found]                             ` <393903856.06449@ustc.edu.cn>
  2007-11-01  7:57                               ` Fengguang Wu
@ 2007-11-01 18:20                               ` Torsten Kaiser
  2007-11-01 19:00                                 ` Torsten Kaiser
       [not found]                                 ` <393968464.13148@ustc.edu.cn>
  1 sibling, 2 replies; 61+ messages in thread
From: Torsten Kaiser @ 2007-11-01 18:20 UTC (permalink / raw)
  To: Fengguang Wu; +Cc: Maxim Levitsky, Peter Zijlstra, linux-kernel, Andrew Morton

On 11/1/07, Fengguang Wu <wfg@mail.ustc.edu.cn> wrote:
> On Wed, Oct 31, 2007 at 04:22:10PM +0100, Torsten Kaiser wrote:
> > Since 2.6.23-mm1 I also experience strange hangs during heavy writeouts.
> > Each time I noticed this I was using emerge (package util from the
> > gentoo distribution) to install/upgrade a package. The last step,
> > where this hang occurred, is moving the prepared files from a tmpfs
> > partion to the main xfs filesystem.
> > The hangs where not fatal, after a few second everything resumed
> > normal, so I was not able to capture a good image of what was
> > happening.
>
> Thank you for the detailed report.
>
> How severe was the hangs? Only writeouts stalled, all apps stalled, or
> cannot type and run new commands?

Only writeout stalled. The emerge that was moving the files hung, but
everything else worked normaly.
I was able to run new commands, like coping the /proc/meminfo.

[snip]
> > After this SysRq+W writeback resumed again. Possible that writing
> > above into the syslog triggered that.
>
> Maybe. Are the log files on another disk/partition?

No, everything was going to /

What might be interesting is, that doing cat /proc/meminfo
>~/stall/meminfo did not resume the writeback. So there might some
threshold that only was broken with the additional write from
syslog-ng. Or syslog-ng does some flushing, I dont now. (I'm using the
syslog-ng package from gentoo:
http://www.balabit.com/products/syslog_ng/ , version 2.0.5)

> > The source tmpfs is mounted with any special parameters, but the
> > target xfs filesystem resides on a dm-crypt device that is on top a 3
> > disk RAID5 md.
> > During the hang all CPUs where idle.
>
> No iowaits? ;-)

No, I have a KSysGuard in my taskbar that showed no activity at all.

OK, the subject does not match for my case, but there was also a tmpfs
involved. And I found no thread with stalls on xfs. :-)

> > The system is x86_64 with CONFIG_NO_HZ=y, but was still receiving ~330
> > interrupts per second because of the bttv driver. (But I was not using
> > that device at this time.)
> >
> > I'm willing to test patches or more provide more information, but lack
> > a good testcase to trigger this on demand.
>
> Thank you. Maybe we can start by the applied debug patch :-)

Will applied it and try to recreate this.

Thanks for looking into it.

Torsten

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: 100% iowait on one of cpus in current -git
  2007-11-01 18:20                               ` Torsten Kaiser
@ 2007-11-01 19:00                                 ` Torsten Kaiser
       [not found]                                   ` <393970108.15915@ustc.edu.cn>
       [not found]                                 ` <393968464.13148@ustc.edu.cn>
  1 sibling, 1 reply; 61+ messages in thread
From: Torsten Kaiser @ 2007-11-01 19:00 UTC (permalink / raw)
  To: Fengguang Wu; +Cc: Maxim Levitsky, Peter Zijlstra, linux-kernel, Andrew Morton

On 11/1/07, Torsten Kaiser <just.for.lkml@googlemail.com> wrote:
> On 11/1/07, Fengguang Wu <wfg@mail.ustc.edu.cn> wrote:
> > Thank you. Maybe we can start by the applied debug patch :-)
>
> Will applied it and try to recreate this.

Patch applied, used emerge to install a 2.6.24-rc1 kernel.

I had no complete stalls, but three times during the move from tmpfs
to the main xfs the emerge got noticeable slower. There still was
writeout happening, but as emerge prints out every file it has written
during the pause not one file was processed.

vmstat 10:
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 0  1      0 3146424    332 614768    0    0   134  1849  438 2515  3  4 91  2
 0  0      0 3146644    332 614784    0    0     2  1628  507  646  0  2 85 13
 0  0      0 3146868    332 614868    0    0     5  2359  527 1076  0  3 97  0
 1  0      0 3144372    332 616148    0    0    96  2829  607 2666  2  5 92  0
-> normal writeout
 0  0      0 3140560    332 618144    0    0   152  2764  633 3308  3  6 91  0
 0  0      0 3137332    332 619908    0    0   114  1801  588 2858  3  4 93  0
 0  0      0 3136912    332 620136    0    0    20   827  393 1605  1  2 98  0
-> first stall
 0  0      0 3137088    332 620136    0    0     0   557  339 1437  0  1 99  0
 0  0      0 3137160    332 620136    0    0     0   642  310 1400  0  1 99  0
 0  0      0 3136588    332 620172    0    0     6  2972  527 1195  0  3 80 16
 0  0      0 3136276    332 620348    0    0    10  2668  558 1195  0  3 96  0
 0  0      0 3135228    332 620424    0    0     8  2712  522 1311  0  4 96  0
 0  0      0 3131740    332 621524    0    0    75  2935  559 2457  2  5 93  0
 0  0      0 3128348    332 622972    0    0    85  1470  490 2607  3  4 93  0
 0  0      0 3129292    332 622972    0    0     0   527  353 1398  0  1 99  0
-> second longer stall
 0  0      0 3128520    332 623028    0    0     6   488  249 1390  0  1 99  0
 0  0      0 3128236    332 623028    0    0     0   482  222 1222  0  1 99  0
 0  0      0 3128408    332 623028    0    0     0   585  269 1301  0  0 99  0
 0  0      0 3128532    332 623028    0    0     0   610  262 1278  0  0 99  0
 0  0      0 3128568    332 623028    0    0     0   636  345 1639  0  1 99  0
 0  0      0 3129032    332 623040    0    0     1   664  337 1466  0  1 99  0
 0  0      0 3129484    332 623040    0    0     0   658  300 1508  0  0 100  0
 0  0      0 3129576    332 623040    0    0     0   562  271 1454  0  1 99  0
 0  0      0 3129736    332 623040    0    0     0   627  278 1406  0  1 99  0
 0  0      0 3129368    332 623040    0    0     0   507  274 1301  0  1 99  0
 0  0      0 3129004    332 623040    0    0     0   444  211 1213  0  0 99  0
 0  1      0 3127260    332 623040    0    0     0  1036  305 1242  0  1 95  4
 0  0      0 3126280    332 623128    0    0     7  4241  555 1575  1  5 84 10
 0  0      0 3124948    332 623232    0    0     6  4194  529 1505  1  4 95  0
 0  0      0 3125228    332 624168    0    0    58  1966  586 1964  2  4 94  0
-> emerge resumed to normal speed, without any intervention from my side
 0  0      0 3120932    332 625904    0    0   112  1546  546 2565  3  4 93  0
 0  0      0 3118012    332 627568    0    0   128  1542  612 2705  3  4 93  0


>From syslog:
first stall:
[  575.050000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 47259
global 610 0 0 wc __ tw 1023 sk 0
[  586.350000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 50465
global 6117 0 0 wc _M tw 967 sk 0
[  586.360000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 50408
global 6117 0 0 wc __ tw 1022 sk 0
[  599.900000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 53523
global 11141 0 0 wc __ tw 1009 sk 0
[  635.780000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 59397
global 12757 124 0 wc __ tw 0 sk 0
[  638.470000] mm/page-writeback.c 418 balance_dirty_pages:
emerge(6113) 1536 global 11405 51 0 wc __ tw 0 sk 0
[  638.820000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 58373
global 11276 48 0 wc __ tw -1 sk 0
[  641.260000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 57348
global 10565 100 0 wc __ tw 0 sk 0
[  643.980000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 56324
global 9788 103 0 wc __ tw -1 sk 0
[  646.120000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 55299
global 8912 6 0 wc __ tw 0 sk 0

second stall:
[  664.040000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 48117
global 2864 81 0 wc _M tw -13 sk 0
[  664.400000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 47080
global 1995 137 0 wc _M tw 176 sk 0
[  664.510000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 46232
global 1929 267 0 wc __ tw 880 sk 0
cron[6927]: (root) CMD (test -x /usr/sbin/run-crons && /usr/sbin/run-crons )
[  809.560000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 49422
global 19166 217 0 wc _M tw 380 sk 0
[  811.720000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 48778
global 17969 407 0 wc _M tw -4 sk 0
[  813.880000] mm/page-writeback.c 418 balance_dirty_pages:
emerge(6113) 1537 global 16592 233 0 wc _M tw -1 sk 0
[  814.710000] mm/page-writeback.c 418 balance_dirty_pages: find(6931)
1537 global 16132 179 0 wc __ tw -1 sk 0
[  814.720000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 47750
global 16040 271 0 wc _M tw -1 sk 0
[  815.040000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 46725
global 15403 779 0 wc CM tw 324 sk 0

the third stall happend after the emerge was finished. There still was
~120Mb of dirty data, but its writeout got much slower over several
seconds.
vmstat 10:
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 1  0      0 3096152    332 630424    0    0    81  1503  640 2771  5  4 91  0
 0  0      0 3101024    332 631588    0    0   279   473  510 1281  5  2 92  1
-> stall / slowdown starts
 0  0      0 3147924    332 632384    0    0    78   626  449 1384  0  1 99  0
 1  0      0 3147940    332 632384    0    0     0   611  388 1387  0  1 99  0
 0  1      0 3147576    332 632384    0    0     0   939  449 1432  0  1 99  0
 0  0      0 3145476    332 632384    0    0     0  3592  644  925  0  4 93  3
-> writeout resumes full speed
 0  0      0 3147232    332 632480    0    0     0  3108  678 1053  0  3 97  0
 0  0      0 3146860    332 632480    0    0     0  2497  677  859  0  3 97  0
 0  0      0 3146720    332 632480    0    0     0  2433  648  839  0  3 97  0
 0  0      0 3147844    332 632484    0    0     0  2394  625  889  0  3 97  0
 0  0      0 3148128    332 632484    0    0     0  2204  671  848  0  2 97  0

from syslog:
[  848.070000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 48084
global 13805 0 0 wc _M tw 1008 sk 0
[  848.080000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 48068
global 13805 0 0 wc __ tw 1020 sk 0
[  884.090000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 61811
global 30297 2 0 wc __ tw 862 sk 0
[  921.760000] mm/page-writeback.c 418 balance_dirty_pages: cat(7170)
1541 global 28113 391 0 wc __ tw -5 sk 0
-> that cat was probably my watch cat /proc/meminfo
-> during the stall there where no updates visible there
[  922.190000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 76871
global 27735 0 0 wc __ tw -5 sk 0
[  923.550000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 75842
global 26688 106 0 wc _M tw -1 sk 0
[  924.940000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 74817
global 25698 195 0 wc _M tw 0 sk 0

Apart from my normal kde desktop (no compiz) and the emerge the system was idle.

If I see the complete stall again, I will post that too.

Torsten

^ permalink raw reply	[flat|nested] 61+ messages in thread

* writeout stalls in current -git
       [not found]                                 ` <393968464.13148@ustc.edu.cn>
@ 2007-11-02  1:54                                   ` Fengguang Wu
  2007-11-02  7:42                                   ` Torsten Kaiser
  1 sibling, 0 replies; 61+ messages in thread
From: Fengguang Wu @ 2007-11-02  1:54 UTC (permalink / raw)
  To: Torsten Kaiser
  Cc: Maxim Levitsky, Peter Zijlstra, linux-kernel, Andrew Morton,
	David Chinner, linux-fsdevel

On Thu, Nov 01, 2007 at 07:20:51PM +0100, Torsten Kaiser wrote:
> On 11/1/07, Fengguang Wu <wfg@mail.ustc.edu.cn> wrote:
> > On Wed, Oct 31, 2007 at 04:22:10PM +0100, Torsten Kaiser wrote:
> > > Since 2.6.23-mm1 I also experience strange hangs during heavy writeouts.
> > > Each time I noticed this I was using emerge (package util from the
> > > gentoo distribution) to install/upgrade a package. The last step,
> > > where this hang occurred, is moving the prepared files from a tmpfs
> > > partion to the main xfs filesystem.
> > > The hangs where not fatal, after a few second everything resumed
> > > normal, so I was not able to capture a good image of what was
> > > happening.
> >
> > Thank you for the detailed report.
> >
> > How severe was the hangs? Only writeouts stalled, all apps stalled, or
> > cannot type and run new commands?
> 
> Only writeout stalled. The emerge that was moving the files hung, but
> everything else worked normaly.
> I was able to run new commands, like coping the /proc/meminfo.

But you mentioned in the next mail that `watch cat /proc/meminfo`
could also be blocked for some time - I guess in the same time emerge
was stalled?

> [snip]
> > > After this SysRq+W writeback resumed again. Possible that writing
> > > above into the syslog triggered that.
> >
> > Maybe. Are the log files on another disk/partition?
> 
> No, everything was going to /
> 
> What might be interesting is, that doing cat /proc/meminfo
> >~/stall/meminfo did not resume the writeback. So there might some
> threshold that only was broken with the additional write from
> syslog-ng. Or syslog-ng does some flushing, I dont now. (I'm using the

Have you tried explicit `sync`? ;-)

> syslog-ng package from gentoo:
> http://www.balabit.com/products/syslog_ng/ , version 2.0.5)
> 
> > > The source tmpfs is mounted with any special parameters, but the
> > > target xfs filesystem resides on a dm-crypt device that is on top a 3
> > > disk RAID5 md.
> > > During the hang all CPUs where idle.
> >
> > No iowaits? ;-)
> 
> No, I have a KSysGuard in my taskbar that showed no activity at all.
> 
> OK, the subject does not match for my case, but there was also a tmpfs
> involved. And I found no thread with stalls on xfs. :-)

Do you mean it is actually related with tmpfs?

> > > The system is x86_64 with CONFIG_NO_HZ=y, but was still receiving ~330
> > > interrupts per second because of the bttv driver. (But I was not using
> > > that device at this time.)
> > >
> > > I'm willing to test patches or more provide more information, but lack
> > > a good testcase to trigger this on demand.
> >
> > Thank you. Maybe we can start by the applied debug patch :-)
> 
> Will applied it and try to recreate this.
> 
> Thanks for looking into it.

Thank you for the rich information, too :-)

Fengguang


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
       [not found]                                   ` <393970108.15915@ustc.edu.cn>
@ 2007-11-02  2:21                                     ` Fengguang Wu
  2007-11-02 10:15                                       ` Peter Zijlstra
  2007-11-02  7:50                                     ` writeout stalls in current -git Torsten Kaiser
  1 sibling, 1 reply; 61+ messages in thread
From: Fengguang Wu @ 2007-11-02  2:21 UTC (permalink / raw)
  To: Torsten Kaiser
  Cc: Maxim Levitsky, Peter Zijlstra, linux-kernel, Andrew Morton,
	David Chinner, linux-fsdevel

[-- Attachment #1: Type: text/plain, Size: 8466 bytes --]

On Thu, Nov 01, 2007 at 08:00:10PM +0100, Torsten Kaiser wrote:
> On 11/1/07, Torsten Kaiser <just.for.lkml@googlemail.com> wrote:
> > On 11/1/07, Fengguang Wu <wfg@mail.ustc.edu.cn> wrote:
> > > Thank you. Maybe we can start by the applied debug patch :-)
> >
> > Will applied it and try to recreate this.
> 
> Patch applied, used emerge to install a 2.6.24-rc1 kernel.
> 
> I had no complete stalls, but three times during the move from tmpfs
> to the main xfs the emerge got noticeable slower. There still was
> writeout happening, but as emerge prints out every file it has written
> during the pause not one file was processed.
> 
> vmstat 10:
> procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
>  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
>  0  1      0 3146424    332 614768    0    0   134  1849  438 2515  3  4 91  2
>  0  0      0 3146644    332 614784    0    0     2  1628  507  646  0  2 85 13
>  0  0      0 3146868    332 614868    0    0     5  2359  527 1076  0  3 97  0
>  1  0      0 3144372    332 616148    0    0    96  2829  607 2666  2  5 92  0
> -> normal writeout
>  0  0      0 3140560    332 618144    0    0   152  2764  633 3308  3  6 91  0
>  0  0      0 3137332    332 619908    0    0   114  1801  588 2858  3  4 93  0
>  0  0      0 3136912    332 620136    0    0    20   827  393 1605  1  2 98  0
> -> first stall

'stall': vmstat's output stalls for some time, or emerge stalls for
the next several vmstat lines?

>  0  0      0 3137088    332 620136    0    0     0   557  339 1437  0  1 99  0
>  0  0      0 3137160    332 620136    0    0     0   642  310 1400  0  1 99  0
>  0  0      0 3136588    332 620172    0    0     6  2972  527 1195  0  3 80 16
>  0  0      0 3136276    332 620348    0    0    10  2668  558 1195  0  3 96  0
>  0  0      0 3135228    332 620424    0    0     8  2712  522 1311  0  4 96  0
>  0  0      0 3131740    332 621524    0    0    75  2935  559 2457  2  5 93  0
>  0  0      0 3128348    332 622972    0    0    85  1470  490 2607  3  4 93  0
>  0  0      0 3129292    332 622972    0    0     0   527  353 1398  0  1 99  0
> -> second longer stall
>  0  0      0 3128520    332 623028    0    0     6   488  249 1390  0  1 99  0
>  0  0      0 3128236    332 623028    0    0     0   482  222 1222  0  1 99  0
>  0  0      0 3128408    332 623028    0    0     0   585  269 1301  0  0 99  0
>  0  0      0 3128532    332 623028    0    0     0   610  262 1278  0  0 99  0
>  0  0      0 3128568    332 623028    0    0     0   636  345 1639  0  1 99  0
>  0  0      0 3129032    332 623040    0    0     1   664  337 1466  0  1 99  0
>  0  0      0 3129484    332 623040    0    0     0   658  300 1508  0  0 100  0
>  0  0      0 3129576    332 623040    0    0     0   562  271 1454  0  1 99  0
>  0  0      0 3129736    332 623040    0    0     0   627  278 1406  0  1 99  0
>  0  0      0 3129368    332 623040    0    0     0   507  274 1301  0  1 99  0
>  0  0      0 3129004    332 623040    0    0     0   444  211 1213  0  0 99  0
>  0  1      0 3127260    332 623040    0    0     0  1036  305 1242  0  1 95  4
>  0  0      0 3126280    332 623128    0    0     7  4241  555 1575  1  5 84 10
>  0  0      0 3124948    332 623232    0    0     6  4194  529 1505  1  4 95  0
>  0  0      0 3125228    332 624168    0    0    58  1966  586 1964  2  4 94  0
> -> emerge resumed to normal speed, without any intervention from my side
>  0  0      0 3120932    332 625904    0    0   112  1546  546 2565  3  4 93  0
>  0  0      0 3118012    332 627568    0    0   128  1542  612 2705  3  4 93  0

Interesting, the 'bo' never falls to zero.

> 
> >From syslog:
> first stall:
> [  575.050000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 47259 > global 610 0 0 wc __ tw 1023 sk 0
> [  586.350000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 50465 > global 6117 0 0 wc _M tw 967 sk 0
> [  586.360000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 50408 > global 6117 0 0 wc __ tw 1022 sk 0
> [  599.900000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 53523 > global 11141 0 0 wc __ tw 1009 sk 0
> [  635.780000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 59397 > global 12757 124 0 wc __ tw 0 sk 0
> [  638.470000] mm/page-writeback.c 418 balance_dirty_pages: > emerge(6113) 1536 global 11405 51 0 wc __ tw 0 sk 0
> [  638.820000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 58373 > global 11276 48 0 wc __ tw -1 sk 0
> [  641.260000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 57348 > global 10565 100 0 wc __ tw 0 sk 0
> [  643.980000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 56324 > global 9788 103 0 wc __ tw -1 sk 0
> [  646.120000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 55299 > global 8912 6 0 wc __ tw 0 sk 0
> 
> second stall:
> [  664.040000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 48117 > global 2864 81 0 wc _M tw -13 sk 0
> [  664.400000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 47080 > global 1995 137 0 wc _M tw 176 sk 0
> [  664.510000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 46232 > global 1929 267 0 wc __ tw 880 sk 0
> cron[6927]: (root) CMD (test -x /usr/sbin/run-crons && /usr/sbin/run-crons )
> [  809.560000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 49422 > global 19166 217 0 wc _M tw 380 sk 0
> [  811.720000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 48778 > global 17969 407 0 wc _M tw -4 sk 0
> [  813.880000] mm/page-writeback.c 418 balance_dirty_pages: > emerge(6113) 1537 global 16592 233 0 wc _M tw -1 sk 0
> [  814.710000] mm/page-writeback.c 418 balance_dirty_pages: find(6931) > 1537 global 16132 179 0 wc __ tw -1 sk 0
> [  814.720000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 47750 > global 16040 271 0 wc _M tw -1 sk 0
> [  815.040000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 46725 > global 15403 779 0 wc CM tw 324 sk 0
>
> the third stall happend after the emerge was finished. There still was
> ~120Mb of dirty data, but its writeout got much slower over several
> seconds.
> vmstat 10:
> procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
>  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
>  1  0      0 3096152    332 630424    0    0    81  1503  640 2771  5  4 91  0
>  0  0      0 3101024    332 631588    0    0   279   473  510 1281  5  2 92  1
> -> stall / slowdown starts
>  0  0      0 3147924    332 632384    0    0    78   626  449 1384  0  1 99  0
>  1  0      0 3147940    332 632384    0    0     0   611  388 1387  0  1 99  0
>  0  1      0 3147576    332 632384    0    0     0   939  449 1432  0  1 99  0
>  0  0      0 3145476    332 632384    0    0     0  3592  644  925  0  4 93  3
> -> writeout resumes full speed
>  0  0      0 3147232    332 632480    0    0     0  3108  678 1053  0  3 97  0
>  0  0      0 3146860    332 632480    0    0     0  2497  677  859  0  3 97  0
>  0  0      0 3146720    332 632480    0    0     0  2433  648  839  0  3 97  0
>  0  0      0 3147844    332 632484    0    0     0  2394  625  889  0  3 97  0
>  0  0      0 3148128    332 632484    0    0     0  2204  671  848  0  2 97  0
> 
> from syslog:
> [  848.070000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 48084 > global 13805 0 0 wc _M tw 1008 sk 0
> [  848.080000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 48068 > global 13805 0 0 wc __ tw 1020 sk 0
> [  884.090000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 61811 > global 30297 2 0 wc __ tw 862 sk 0
> [  921.760000] mm/page-writeback.c 418 balance_dirty_pages: cat(7170) > 1541 global 28113 391 0 wc __ tw -5 sk 0
> -> that cat was probably my watch cat /proc/meminfo
> -> during the stall there where no updates visible there
> [  922.190000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 76871 > global 27735 0 0 wc __ tw -5 sk 0
> [  923.550000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 75842 > global 26688 106 0 wc _M tw -1 sk 0
> [  924.940000] mm/page-writeback.c 655 wb_kupdate: pdflush(285) 74817 > global 25698 195 0 wc _M tw 0 sk 0
> 
> Apart from my normal kde desktop (no compiz) and the emerge the system was idle.

Interestingly, no background_writeout() appears, but only
balance_dirty_pages() and wb_kupdate.  Obviously wb_kupdate won't
block the process.

> If I see the complete stall again, I will post that too.
 
Thank you, could you run it with the attached new debug patch?

Fengguang

[-- Attachment #2: writeback-debug.patch --]
[-- Type: text/x-diff, Size: 2677 bytes --]

---
 mm/page-writeback.c |   29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

--- linux-2.6.24-git17.orig/mm/page-writeback.c
+++ linux-2.6.24-git17/mm/page-writeback.c
@@ -98,6 +98,26 @@ EXPORT_SYMBOL(laptop_mode);
 
 /* End of sysctl-exported parameters */
 
+#define writeback_debug_report(n, wbc) do {                               \
+	__writeback_debug_report(n, wbc, __FILE__, __LINE__, __FUNCTION__); \
+} while (0)
+
+void __writeback_debug_report(long n, struct writeback_control *wbc,
+		const char *file, int line, const char *func)
+{
+	printk(KERN_DEBUG "%s %d %s: %s(%d) %ld "
+			"global %lu %lu %lu "
+			"wc %c%c tw %ld sk %ld\n",
+			file, line, func,
+			current->comm, current->pid, n,
+			global_page_state(NR_FILE_DIRTY),
+			global_page_state(NR_WRITEBACK),
+			global_page_state(NR_UNSTABLE_NFS),
+			wbc->encountered_congestion ? 'C':'_',
+			wbc->more_io ? 'M':'_',
+			wbc->nr_to_write,
+			wbc->pages_skipped);
+}
 
 static void background_writeout(unsigned long _min_pages);
 
@@ -395,6 +415,7 @@ static void balance_dirty_pages(struct a
 			pages_written += write_chunk - wbc.nr_to_write;
 			get_dirty_limits(&background_thresh, &dirty_thresh,
 				       &bdi_thresh, bdi);
+			writeback_debug_report(pages_written, &wbc);
 		}
 
 		/*
@@ -421,6 +442,7 @@ static void balance_dirty_pages(struct a
 			break;		/* We've done our duty */
 
 		congestion_wait(WRITE, HZ/10);
+		writeback_debug_report(-pages_written, &wbc);
 	}
 
 	if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh &&
@@ -515,6 +537,11 @@ void throttle_vm_writeout(gfp_t gfp_mask
 			global_page_state(NR_WRITEBACK) <= dirty_thresh)
                         	break;
                 congestion_wait(WRITE, HZ/10);
+		printk(KERN_DEBUG "throttle_vm_writeout: "
+				"congestion_wait on %lu+%lu > %lu\n",
+				global_page_state(NR_UNSTABLE_NFS),
+				global_page_state(NR_WRITEBACK),
+				dirty_thresh);
 
 		/*
 		 * The caller might hold locks which can prevent IO completion
@@ -557,6 +584,7 @@ static void background_writeout(unsigned
 		wbc.pages_skipped = 0;
 		writeback_inodes(&wbc);
 		min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+		writeback_debug_report(min_pages, &wbc);
 		if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
 			/* Wrote less than expected */
 			if (wbc.encountered_congestion || wbc.more_io)
@@ -630,6 +658,7 @@ static void wb_kupdate(unsigned long arg
 		wbc.encountered_congestion = 0;
 		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
 		writeback_inodes(&wbc);
+		writeback_debug_report(nr_to_write, &wbc);
 		if (wbc.nr_to_write > 0) {
 			if (wbc.encountered_congestion || wbc.more_io)
 				congestion_wait(WRITE, HZ/10);

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
       [not found]                                 ` <393968464.13148@ustc.edu.cn>
  2007-11-02  1:54                                   ` Fengguang Wu
@ 2007-11-02  7:42                                   ` Torsten Kaiser
       [not found]                                     ` <393989953.22199@ustc.edu.cn>
  1 sibling, 1 reply; 61+ messages in thread
From: Torsten Kaiser @ 2007-11-02  7:42 UTC (permalink / raw)
  To: Fengguang Wu
  Cc: Maxim Levitsky, Peter Zijlstra, linux-kernel, Andrew Morton,
	David Chinner, linux-fsdevel

The Subject is still missleading, I'm using 2.6.23-mm1.

On 11/2/07, Fengguang Wu <wfg@mail.ustc.edu.cn> wrote:
> On Thu, Nov 01, 2007 at 07:20:51PM +0100, Torsten Kaiser wrote:
> > On 11/1/07, Fengguang Wu <wfg@mail.ustc.edu.cn> wrote:
> > > On Wed, Oct 31, 2007 at 04:22:10PM +0100, Torsten Kaiser wrote:
> > > > Since 2.6.23-mm1 I also experience strange hangs during heavy writeouts.
> > > > Each time I noticed this I was using emerge (package util from the
> > > > gentoo distribution) to install/upgrade a package. The last step,
> > > > where this hang occurred, is moving the prepared files from a tmpfs
> > > > partion to the main xfs filesystem.
> > > > The hangs where not fatal, after a few second everything resumed
> > > > normal, so I was not able to capture a good image of what was
> > > > happening.
> > >
> > > Thank you for the detailed report.
> > >
> > > How severe was the hangs? Only writeouts stalled, all apps stalled, or
> > > cannot type and run new commands?
> >
> > Only writeout stalled. The emerge that was moving the files hung, but
> > everything else worked normaly.
> > I was able to run new commands, like coping the /proc/meminfo.
>
> But you mentioned in the next mail that `watch cat /proc/meminfo`
> could also be blocked for some time - I guess in the same time emerge
> was stalled?

The behavior was different on these stalls.
On first report the writeout stopped completly, the emerge stopped,
but at that time a cat /proc/meminfo >~/stall/meminfo did succedd and
not stall.
About the watch cat /proc/meminfo, I will write in the answer to the
other mail...

> > [snip]
> > > > After this SysRq+W writeback resumed again. Possible that writing
> > > > above into the syslog triggered that.
> > >
> > > Maybe. Are the log files on another disk/partition?
> >
> > No, everything was going to /
> >
> > What might be interesting is, that doing cat /proc/meminfo
> > >~/stall/meminfo did not resume the writeback. So there might some
> > threshold that only was broken with the additional write from
> > syslog-ng. Or syslog-ng does some flushing, I dont now. (I'm using the
>
> Have you tried explicit `sync`? ;-)

No. I wanted to see what is stalled. So I startet by collecting info
from /proc and then the SysRq+W. And after hitting SysRQ the writeout
started to resume without any further action.

But I think I have seen a `sync` stall also. During an other emerge I
noticed the system slowing down and wanted to use `sync` to speed up
the writeout. The result was, that the writeout did not speed up
imiedetly only after around a minitue. The `sync` only returned at
that time.
Can writers starve `sync`?

> > syslog-ng package from gentoo:
> > http://www.balabit.com/products/syslog_ng/ , version 2.0.5)
> >
> > > > The source tmpfs is mounted with any special parameters, but the
> > > > target xfs filesystem resides on a dm-crypt device that is on top a 3
> > > > disk RAID5 md.
> > > > During the hang all CPUs where idle.
> > >
> > > No iowaits? ;-)
> >
> > No, I have a KSysGuard in my taskbar that showed no activity at all.
> >
> > OK, the subject does not match for my case, but there was also a tmpfs
> > involved. And I found no thread with stalls on xfs. :-)
>
> Do you mean it is actually related with tmpfs?

I don't know. It's just that I have seen tmpfs also redirtieing inodes
in these logs and the stalling emerge is moving files from tmpfs to
xfs.
It could be, but I don't know enough about tmpfs internals to really be sure.
I just wanted to mention, that tmpfs is involved somehow.

Torsten

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
       [not found]                                   ` <393970108.15915@ustc.edu.cn>
  2007-11-02  2:21                                     ` writeout stalls " Fengguang Wu
@ 2007-11-02  7:50                                     ` Torsten Kaiser
  1 sibling, 0 replies; 61+ messages in thread
From: Torsten Kaiser @ 2007-11-02  7:50 UTC (permalink / raw)
  To: Fengguang Wu
  Cc: Maxim Levitsky, Peter Zijlstra, linux-kernel, Andrew Morton,
	David Chinner, linux-fsdevel

On 11/2/07, Fengguang Wu <wfg@mail.ustc.edu.cn> wrote:
> On Thu, Nov 01, 2007 at 08:00:10PM +0100, Torsten Kaiser wrote:
> > On 11/1/07, Torsten Kaiser <just.for.lkml@googlemail.com> wrote:
> > > On 11/1/07, Fengguang Wu <wfg@mail.ustc.edu.cn> wrote:
> > > > Thank you. Maybe we can start by the applied debug patch :-)
> > >
> > > Will applied it and try to recreate this.
> >
> > Patch applied, used emerge to install a 2.6.24-rc1 kernel.
> >
> > I had no complete stalls, but three times during the move from tmpfs
> > to the main xfs the emerge got noticeable slower. There still was
> > writeout happening, but as emerge prints out every file it has written
> > during the pause not one file was processed.
> >
> > vmstat 10:
> > procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
> >  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
> >  0  1      0 3146424    332 614768    0    0   134  1849  438 2515  3  4 91  2
> >  0  0      0 3146644    332 614784    0    0     2  1628  507  646  0  2 85 13
> >  0  0      0 3146868    332 614868    0    0     5  2359  527 1076  0  3 97  0
> >  1  0      0 3144372    332 616148    0    0    96  2829  607 2666  2  5 92  0
> > -> normal writeout
> >  0  0      0 3140560    332 618144    0    0   152  2764  633 3308  3  6 91  0
> >  0  0      0 3137332    332 619908    0    0   114  1801  588 2858  3  4 93  0
> >  0  0      0 3136912    332 620136    0    0    20   827  393 1605  1  2 98  0
> > -> first stall
>
> 'stall': vmstat's output stalls for some time, or emerge stalls for
> the next several vmstat lines?

emerge stalls. The vmstat did work normally.

> >  0  0      0 3137088    332 620136    0    0     0   557  339 1437  0  1 99  0
> >  0  0      0 3137160    332 620136    0    0     0   642  310 1400  0  1 99  0

So meaning that these last three lines indicated that for ~30 seconds
the writeout was much slower than normal.

> >  0  0      0 3136588    332 620172    0    0     6  2972  527 1195  0  3 80 16
> >  0  0      0 3136276    332 620348    0    0    10  2668  558 1195  0  3 96  0
> >  0  0      0 3135228    332 620424    0    0     8  2712  522 1311  0  4 96  0
> >  0  0      0 3131740    332 621524    0    0    75  2935  559 2457  2  5 93  0
> >  0  0      0 3128348    332 622972    0    0    85  1470  490 2607  3  4 93  0
> >  0  0      0 3129292    332 622972    0    0     0   527  353 1398  0  1 99  0
> > -> second longer stall
> >  0  0      0 3128520    332 623028    0    0     6   488  249 1390  0  1 99  0
> >  0  0      0 3128236    332 623028    0    0     0   482  222 1222  0  1 99  0
> >  0  0      0 3128408    332 623028    0    0     0   585  269 1301  0  0 99  0
> >  0  0      0 3128532    332 623028    0    0     0   610  262 1278  0  0 99  0
> >  0  0      0 3128568    332 623028    0    0     0   636  345 1639  0  1 99  0
> >  0  0      0 3129032    332 623040    0    0     1   664  337 1466  0  1 99  0
> >  0  0      0 3129484    332 623040    0    0     0   658  300 1508  0  0 100  0
> >  0  0      0 3129576    332 623040    0    0     0   562  271 1454  0  1 99  0
> >  0  0      0 3129736    332 623040    0    0     0   627  278 1406  0  1 99  0
> >  0  0      0 3129368    332 623040    0    0     0   507  274 1301  0  1 99  0
> >  0  0      0 3129004    332 623040    0    0     0   444  211 1213  0  0 99  0

The second time the slowdown was much longer.

> >  0  1      0 3127260    332 623040    0    0     0  1036  305 1242  0  1 95  4
> >  0  0      0 3126280    332 623128    0    0     7  4241  555 1575  1  5 84 10
> >  0  0      0 3124948    332 623232    0    0     6  4194  529 1505  1  4 95  0
> >  0  0      0 3125228    332 624168    0    0    58  1966  586 1964  2  4 94  0
> > -> emerge resumed to normal speed, without any intervention from my side
> >  0  0      0 3120932    332 625904    0    0   112  1546  546 2565  3  4 93  0
> >  0  0      0 3118012    332 627568    0    0   128  1542  612 2705  3  4 93  0
>
> Interesting, the 'bo' never falls to zero.

Yes, I was not able to recreate the complete stall from the first
mail, but even this slowdown does not look completly healthy.
I "hope" this is the same bug, as I seem to be able to trigger this
slowdown much easier.

[snip logs]
>
> Interestingly, no background_writeout() appears, but only
> balance_dirty_pages() and wb_kupdate.  Obviously wb_kupdate won't
> block the process.

Yes, I noticed that too.
The only time I have seen background_writeout was during bootup and shutdown.

As for the stalled watch cat /proc/meminfo:
That happend on the third slowdown/stall when emerge was already finished

> > If I see the complete stall again, I will post that too.
>
> Thank you, could you run it with the attached new debug patch?

I will, but it will have to wait until the evening.

Torsten

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
       [not found]                                     ` <393989953.22199@ustc.edu.cn>
@ 2007-11-02  7:52                                       ` Fengguang Wu
  2007-11-02 17:47                                       ` Torsten Kaiser
  1 sibling, 0 replies; 61+ messages in thread
From: Fengguang Wu @ 2007-11-02  7:52 UTC (permalink / raw)
  To: Torsten Kaiser
  Cc: Maxim Levitsky, Peter Zijlstra, linux-kernel, Andrew Morton,
	David Chinner, linux-fsdevel

On Fri, Nov 02, 2007 at 08:42:05AM +0100, Torsten Kaiser wrote:
> The Subject is still missleading, I'm using 2.6.23-mm1.
> 
> On 11/2/07, Fengguang Wu <wfg@mail.ustc.edu.cn> wrote:
> > On Thu, Nov 01, 2007 at 07:20:51PM +0100, Torsten Kaiser wrote:
> > > On 11/1/07, Fengguang Wu <wfg@mail.ustc.edu.cn> wrote:
> > > > On Wed, Oct 31, 2007 at 04:22:10PM +0100, Torsten Kaiser wrote:
> > > > > Since 2.6.23-mm1 I also experience strange hangs during heavy writeouts.
> > > > > Each time I noticed this I was using emerge (package util from the
> > > > > gentoo distribution) to install/upgrade a package. The last step,
> > > > > where this hang occurred, is moving the prepared files from a tmpfs
> > > > > partion to the main xfs filesystem.
> > > > > The hangs where not fatal, after a few second everything resumed
> > > > > normal, so I was not able to capture a good image of what was
> > > > > happening.
> > > >
> > > > Thank you for the detailed report.
> > > >
> > > > How severe was the hangs? Only writeouts stalled, all apps stalled, or
> > > > cannot type and run new commands?
> > >
> > > Only writeout stalled. The emerge that was moving the files hung, but
> > > everything else worked normaly.
> > > I was able to run new commands, like coping the /proc/meminfo.
> >
> > But you mentioned in the next mail that `watch cat /proc/meminfo`
> > could also be blocked for some time - I guess in the same time emerge
> > was stalled?
> 
> The behavior was different on these stalls.
> On first report the writeout stopped completly, the emerge stopped,
> but at that time a cat /proc/meminfo >~/stall/meminfo did succedd and
> not stall.
> About the watch cat /proc/meminfo, I will write in the answer to the
> other mail...

OK.

> > > [snip]
> > > > > After this SysRq+W writeback resumed again. Possible that writing
> > > > > above into the syslog triggered that.
> > > >
> > > > Maybe. Are the log files on another disk/partition?
> > >
> > > No, everything was going to /
> > >
> > > What might be interesting is, that doing cat /proc/meminfo
> > > >~/stall/meminfo did not resume the writeback. So there might some
> > > threshold that only was broken with the additional write from
> > > syslog-ng. Or syslog-ng does some flushing, I dont now. (I'm using the
> >
> > Have you tried explicit `sync`? ;-)
> 
> No. I wanted to see what is stalled. So I startet by collecting info
> from /proc and then the SysRq+W. And after hitting SysRQ the writeout
> started to resume without any further action.
> 
> But I think I have seen a `sync` stall also. During an other emerge I
> noticed the system slowing down and wanted to use `sync` to speed up
> the writeout. The result was, that the writeout did not speed up
> imiedetly only after around a minitue. The `sync` only returned at
> that time.
> Can writers starve `sync`?

I guess the new debug printks will provide more hints on it.

> > > syslog-ng package from gentoo:
> > > http://www.balabit.com/products/syslog_ng/ , version 2.0.5)
> > >
> > > > > The source tmpfs is mounted with any special parameters, but the
> > > > > target xfs filesystem resides on a dm-crypt device that is on top a 3
> > > > > disk RAID5 md.
> > > > > During the hang all CPUs where idle.
> > > >
> > > > No iowaits? ;-)
> > >
> > > No, I have a KSysGuard in my taskbar that showed no activity at all.
> > >
> > > OK, the subject does not match for my case, but there was also a tmpfs
> > > involved. And I found no thread with stalls on xfs. :-)
> >
> > Do you mean it is actually related with tmpfs?
> 
> I don't know. It's just that I have seen tmpfs also redirtieing inodes
> in these logs and the stalling emerge is moving files from tmpfs to
> xfs.
> It could be, but I don't know enough about tmpfs internals to really be sure.
> I just wanted to mention, that tmpfs is involved somehow.

The requeue messages for tmpfs are not pleasant, but known to be fine ;-) 

Fengguang


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-02  2:21                                     ` writeout stalls " Fengguang Wu
@ 2007-11-02 10:15                                       ` Peter Zijlstra
  2007-11-02 19:22                                         ` Torsten Kaiser
       [not found]                                         ` <393999615.15343@ustc.edu.cn>
  0 siblings, 2 replies; 61+ messages in thread
From: Peter Zijlstra @ 2007-11-02 10:15 UTC (permalink / raw)
  To: Fengguang Wu
  Cc: Torsten Kaiser, Maxim Levitsky, linux-kernel, Andrew Morton,
	David Chinner, linux-fsdevel

On Fri, 2007-11-02 at 10:21 +0800, Fengguang Wu wrote:

> Interestingly, no background_writeout() appears, but only
> balance_dirty_pages() and wb_kupdate.  Obviously wb_kupdate won't
> block the process.

Yeah, the background threshold is not (yet) scaled. So it can happen
that the bdi_dirty limit is below the background limit.

I'm curious though as to these stalls, though, I can't seem to think of
what goes wrong.. esp since most writeback seems to happen from pdflush.

(or I'm totally misreading it - quite a possible as I'm still recovering
from a serious cold and not all the green stuff has yet figured out its
proper place wrt brain cells 'n stuff)


I still have this patch floating around:

---
Subject: mm: speed up writeback ramp-up on clean systems

We allow violation of bdi limits if there is a lot of room on the
system. Once we hit half the total limit we start enforcing bdi limits
and bdi ramp-up should happen. Doing it this way avoids many small
writeouts on an otherwise idle system and should also speed up the
ramp-up.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 mm/page-writeback.c |   19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

Index: linux-2.6/mm/page-writeback.c
===================================================================
--- linux-2.6.orig/mm/page-writeback.c	2007-09-28 10:08:33.937415368 +0200
+++ linux-2.6/mm/page-writeback.c	2007-09-28 10:54:26.018247516 +0200
@@ -355,8 +355,8 @@ get_dirty_limits(long *pbackground, long
  */
 static void balance_dirty_pages(struct address_space *mapping)
 {
-	long bdi_nr_reclaimable;
-	long bdi_nr_writeback;
+	long nr_reclaimable, bdi_nr_reclaimable;
+	long nr_writeback, bdi_nr_writeback;
 	long background_thresh;
 	long dirty_thresh;
 	long bdi_thresh;
@@ -376,11 +376,26 @@ static void balance_dirty_pages(struct a
 
 		get_dirty_limits(&background_thresh, &dirty_thresh,
 				&bdi_thresh, bdi);
+
+		nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
+					global_page_state(NR_UNSTABLE_NFS);
+		nr_writeback = global_page_state(NR_WRITEBACK);
+
 		bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
 		bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
+
 		if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
 			break;
 
+		/*
+		 * Throttle it only when the background writeback cannot
+		 * catch-up. This avoids (excessively) small writeouts
+		 * when the bdi limits are ramping up.
+		 */
+		if (nr_reclaimable + nr_writeback <
+				(background_thresh + dirty_thresh) / 2)
+			break;
+
 		if (!bdi->dirty_exceeded)
 			bdi->dirty_exceeded = 1;
 



^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
       [not found]                                         ` <393999615.15343@ustc.edu.cn>
@ 2007-11-02 10:33                                           ` Fengguang Wu
  2007-11-05 23:57                                           ` Andrew Morton
  1 sibling, 0 replies; 61+ messages in thread
From: Fengguang Wu @ 2007-11-02 10:33 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Torsten Kaiser, Maxim Levitsky, linux-kernel, Andrew Morton,
	David Chinner, linux-fsdevel

On Fri, Nov 02, 2007 at 11:15:32AM +0100, Peter Zijlstra wrote:
> On Fri, 2007-11-02 at 10:21 +0800, Fengguang Wu wrote:
> 
> > Interestingly, no background_writeout() appears, but only
> > balance_dirty_pages() and wb_kupdate.  Obviously wb_kupdate won't
> > block the process.
> 
> Yeah, the background threshold is not (yet) scaled. So it can happen
> that the bdi_dirty limit is below the background limit.
> 
> I'm curious though as to these stalls, though, I can't seem to think of
> what goes wrong.. esp since most writeback seems to happen from pdflush.

Me confused too. The new debug patch will confirm whether emerge is
waiting in balance_dirty_pages().

> (or I'm totally misreading it - quite a possible as I'm still recovering
> from a serious cold and not all the green stuff has yet figured out its
> proper place wrt brain cells 'n stuff)

Do take care of yourself.

> 
> I still have this patch floating around:

I think this patch is OK for 2.6.24 :-)

Reviewed-by: Fengguang Wu <wfg@mail.ustc.edu.cn> 

> 
> ---
> Subject: mm: speed up writeback ramp-up on clean systems
> 
> We allow violation of bdi limits if there is a lot of room on the
> system. Once we hit half the total limit we start enforcing bdi limits
> and bdi ramp-up should happen. Doing it this way avoids many small
> writeouts on an otherwise idle system and should also speed up the
> ramp-up.
> 
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
>
> ---
>  mm/page-writeback.c |   19 +++++++++++++++++--
>  1 file changed, 17 insertions(+), 2 deletions(-)
> 
> Index: linux-2.6/mm/page-writeback.c
> ===================================================================
> --- linux-2.6.orig/mm/page-writeback.c	2007-09-28 10:08:33.937415368 +0200
> +++ linux-2.6/mm/page-writeback.c	2007-09-28 10:54:26.018247516 +0200
> @@ -355,8 +355,8 @@ get_dirty_limits(long *pbackground, long
>   */
>  static void balance_dirty_pages(struct address_space *mapping)
>  {
> -	long bdi_nr_reclaimable;
> -	long bdi_nr_writeback;
> +	long nr_reclaimable, bdi_nr_reclaimable;
> +	long nr_writeback, bdi_nr_writeback;
>  	long background_thresh;
>  	long dirty_thresh;
>  	long bdi_thresh;
> @@ -376,11 +376,26 @@ static void balance_dirty_pages(struct a
>  
>  		get_dirty_limits(&background_thresh, &dirty_thresh,
>  				&bdi_thresh, bdi);
> +
> +		nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
> +					global_page_state(NR_UNSTABLE_NFS);
> +		nr_writeback = global_page_state(NR_WRITEBACK);
> +
>  		bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
>  		bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
> +
>  		if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
>  			break;
>  
> +		/*
> +		 * Throttle it only when the background writeback cannot
> +		 * catch-up. This avoids (excessively) small writeouts
> +		 * when the bdi limits are ramping up.
> +		 */
> +		if (nr_reclaimable + nr_writeback <
> +				(background_thresh + dirty_thresh) / 2)
> +			break;
> +
>  		if (!bdi->dirty_exceeded)
>  			bdi->dirty_exceeded = 1;
>  
> 
> 


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
       [not found]                                     ` <393989953.22199@ustc.edu.cn>
  2007-11-02  7:52                                       ` Fengguang Wu
@ 2007-11-02 17:47                                       ` Torsten Kaiser
  1 sibling, 0 replies; 61+ messages in thread
From: Torsten Kaiser @ 2007-11-02 17:47 UTC (permalink / raw)
  To: Fengguang Wu
  Cc: Maxim Levitsky, Peter Zijlstra, linux-kernel, Andrew Morton,
	David Chinner, linux-fsdevel

On 11/2/07, Fengguang Wu <wfg@mail.ustc.edu.cn> wrote:
> I guess the new debug printks will provide more hints on it.

The "throttle_vm_writeout" did not trigger for my new workload.
Except one (the first) "balance_dirty_pages" came from line 445, the
newly added.

But I found an other workload that looks much more ... hmm ... 'mad'.

If I do an unmerge the emerge program will read all files to
revalidate their checksum and then delete it. If I do this unmerge the
progress of emerge will stall periodically for ~47 second. (Two times
I used a stopwatch to get this value. I think all other stalls where
identical, at least in KSysGuard they looked evenly spaced)

What really counts as 'mad' is this output from vmstat 10:
0  0      0 3639044    332 177420    0    0   292    20  101  618  1  1 98  0
 1  0      0 3624068    332 180628    0    0   323    22  137  663  5  2 93  0
 0  0      0 3602456    332 183972    0    0   301    23  159  641  9  3 87  2
-> this was emerge collecting its package database
 0  0      0 3600052    332 184264    0    0    19  7743  823 5543  3  8 89  0
 0  0      0 3599332    332 184280    0    0     1  2532  517 2341  1  2 97  0
-> normal removing, now the emerge stalls
 0  0      0 3599404    332 184280    0    0     0   551  323 1290  0  0 99  0
 0  0      0 3599648    332 184280    0    0     0   644  314 1222  0  1 99  0
 0  0      0 3599648    332 184284    0    0     0   569  296 1242  0  0 99  0
 0  0      0 3599868    332 184288    0    0     0  2362  320 2735  1  2 97  0
-> resumes for a short time, then stalls again
 0  0      0 3599488    332 184288    0    0     0   584  292 1395  0  0 99  0
 0  0      0 3600216    332 184288    0    0     0   550  301 1361  0  0 99  0
 0  0      0 3594176    332 184296    0    0     0   562  300 1373  2  1 97  0
 0  0      0 3594648    332 184296    0    0     0  1278  336 1881  1  1 98  0
 0  0      0 3594172    332 184308    0    0     1  2812  421 2840  1  4 95  0
-> and again
 0  0      0 3594296    332 184308    0    0     0   545  342 1283  0  0 99  0
 0  0      0 3594376    332 184308    0    0     0   561  319 1314  0  1 99  0
 0  0      0 3594340    332 184308    0    0     0   586  327 1258  0  1 99  0
 0  0      0 3594644    332 184308    0    0     0   498  248 1376  0  0 99  0
 0  0      0 3595116    332 184348    0    0     0  3519  565 3452  2  4 95  0
-> and again
 0  0      0 3595320    332 184348    0    0     0   483  284 1163  0  0 99  0
 3  0      0 3595444    332 184352    0    0     0   498  247 1173  3  0 97  0
 1  0      0 3585108    332 184600    0    0     0  1298  644 2394  1  1 98  0
 1  0      0 3588152    332 184608    0    0     0  3154  520 3221  2  4 94  0
-> and again
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 2  0      0 3588540    332 184608    0    0     0   574  268 1332  0  1 99  0
 1  0      0 3588744    332 184608    0    0     0   546  335 1289  0  0 99  0
 1  0      0 3588628    332 184608    0    0     0   638  348 1257  0  1 99  0
 1  0      0 3588952    332 184608    0    0     0   567  310 1226  0  1 99  0
 1  0      0 3603644    332 184972    0    0    59  2821  531 2419  3  4 91  1
 1  0      0 3649476    332 186272    0    0   370   395  380 1335  1  1 98  0
-> emerge finishes, and now the system goes 'mad'
The Dirty:-line from /proc/meminfo stays at 8 or 12 kB, but there
system is writing like 'mad':
 1  0      0 3650616    332 186276    0    0     0   424  296 1126  0  1 99  0
 1  0      0 3650708    332 186276    0    0     0   418  249 1190  0  0 99  0
 1  0      0 3650716    332 186276    0    0     0   418  256 1151  0  1 99  0
 1  0      0 3650816    332 186276    0    0     0   420  257 1120  0  0 99  0
 1  0      0 3651132    332 186276    0    0     0   418  269 1145  0  0 99  0
 1  0      0 3651332    332 186280    0    0     0   419  294 1099  0  1 99  0
 1  0      0 3651732    332 186280    0    0     0   423  311 1072  0  1 99  0
 1  0      0 3652048    332 186280    0    0     0   400  317 1127  0  0 99  0
 1  0      0 3652024    332 186280    0    0     0   426  346 1066  0  1 99  0
 2  0      0 3652304    332 186280    0    0     0   425  357 1132  0  1 99  0
 2  0      0 3652652    332 186280    0    0     0   416  364 1184  0  0 99  0
 1  0      0 3652836    332 186280    0    0     0   413  397 1110  0  1 99  0
 1  0      0 3652852    332 186284    0    0     0   426  427 1290  0  1 99  0
 1  0      0 3652060    332 186420    0    0    14   404  421 1768  1  1 97  0
 1  0      0 3652904    332 186420    0    0     0   418  437 1792  1  1 98  0
 1  0      0 3653572    332 186420    0    0     0   410  442 1481  1  1 99  0
 2  0      0 3653872    332 186420    0    0     0   410  451 1206  0  1 99  0
 3  0      0 3654572    332 186420    0    0     0   414  479 1341  0  1 99  0
 1  0      0 3651720    332 189832    0    0   341   420  540 1600  1  1 98  1
 1  0      0 3653256    332 189832    0    0     0   411  499 1538  1  1 98  0
 1  0      0 3654268    332 189832    0    0     0   428  505 1281  0  1 99  0
 1  0      0 3655328    332 189832    0    0     0   394  532 1015  0  1 99  0
 2  0      0 3655804    332 189832    0    0     0   355  546  964  0  1 99  0
 1  0      0 3656804    332 189836    0    0     0   337  527  949  0  1 99  0
 1  0      0 3658020    332 189836    0    0     0   348  522  937  0  1 99  0
 1  0      0 3659992    332 189836    0    0     0   354  503 1078  0  1 99  0
 1  0      0 3660068    332 189836    0    0     0    69  341  356  0  0 99  0
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 3  0      0 3660208    332 189836    0    0     0    18  311  236  0  0 99  0
 2  0      0 3660028    332 189836    0    0     0     1  297  210  0  0 100  0
... until it stopps.
I tried this a second time, the same happend again.
Neither SysRq+S nor `sync` will stop this after-finish-writeout.
During the unmerges I had never seen more then 300 kB of dirty data,
but as watch only updated once every 2 seconds that is not really a
hard limit, but just what I was able to see.

There was nothing else accessing the disks, only kcryptd, md1_raid5,
pdflush and emerge showed up with minimal cpu time in top / atop.

Before/during emerge stall:
[  360.920000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 30759
global 2 0 0 wc __ tw 1023 sk 0
[  364.910000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 30759
global 2 0 0 wc __ tw 1023 sk 0
[  369.530000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 30759
global 2 0 0 wc __ tw 1024 sk 0
[  374.560000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 30386
global 3 0 0 wc __ tw 1024 sk 0
[  379.600000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 28684
global 3 0 0 wc __ tw 1024 sk 0
[  384.600000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 28684
global 3 0 0 wc __ tw 1024 sk 0
[  389.660000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 28684
global 3 0 0 wc __ tw 1024 sk 0
[  394.600000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 28684
global 3 0 0 wc _M tw 1023 sk 0
[  394.620000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 28683
global 3 0 0 wc __ tw 1023 sk 0
[  399.600000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 28683
global 2 0 0 wc __ tw 1023 sk 0
[  404.600000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 28683
global 2 0 0 wc __ tw 1024 sk 0

At this point definitly was the stall, as I then hit SysRq+W:
SysRq : Show Blocked State
  task                        PC stack   pid father
xfssyncd      D 0000000000000000     0  1040      2
 ffff810006177b60 0000000000000046 0000000000000000 0000007000000001
 0000000000000c31 0000000000000000 ffffffff80819b00 ffffffff80819b00
 ffffffff80815f40 ffffffff80819b00 ffff810006177b20 ffff810006177b10
Call Trace:
 [<ffffffff805b16a7>] __down+0xa7/0x11e
 [<ffffffff8022da70>] default_wake_function+0x0/0x10
 [<ffffffff805b1325>] __down_failed+0x35/0x3a
 [<ffffffff8037528e>] xfs_buf_lock+0x3e/0x40
 [<ffffffff803773ce>] _xfs_buf_find+0x13e/0x240
 [<ffffffff8037753f>] xfs_buf_get_flags+0x6f/0x190
 [<ffffffff80377672>] xfs_buf_read_flags+0x12/0xa0
 [<ffffffff803687e4>] xfs_trans_read_buf+0x64/0x340
 [<ffffffff80352321>] xfs_itobp+0x81/0x1e0
 [<ffffffff8022da70>] default_wake_function+0x0/0x10
 [<ffffffff80354cce>] xfs_iflush+0xfe/0x520
 [<ffffffff8036d48f>] xfs_finish_reclaim+0x15f/0x1c0
 [<ffffffff8036d5bb>] xfs_finish_reclaim_all+0xcb/0xf0
 [<ffffffff8036b608>] xfs_syncsub+0x68/0x300
 [<ffffffff8037cbe7>] xfs_sync_worker+0x17/0x40
 [<ffffffff8037cea2>] xfssyncd+0x142/0x1d0
 [<ffffffff8037cd60>] xfssyncd+0x0/0x1d0
 [<ffffffff8024a32b>] kthread+0x4b/0x80
 [<ffffffff8020c9d8>] child_rip+0xa/0x12
 [<ffffffff80219bd0>] lapic_next_event+0x0/0x10
 [<ffffffff8024a2e0>] kthread+0x0/0x80
 [<ffffffff8020c9ce>] child_rip+0x0/0x12

emerge        D ffff81010901b308     0  6130   6116
 ffff81000c5939e8 0000000000000086 0000000000000000 ffff81000614ff80
 ffff8101089dd7f0 ffffffff8022d61c ffffffff80819b00 ffffffff80819b00
 ffffffff80815f40 ffffffff80819b00 0000000000000086 ffffffff8022d7f3
Call Trace:
 [<ffffffff8022d61c>] task_rq_lock+0x4c/0x90
 [<ffffffff8022d7f3>] try_to_wake_up+0x63/0x2e0
 [<ffffffff805b16a7>] __down+0xa7/0x11e
 [<ffffffff8022da70>] default_wake_function+0x0/0x10
 [<ffffffff805b1325>] __down_failed+0x35/0x3a
 [<ffffffff8037528e>] xfs_buf_lock+0x3e/0x40
 [<ffffffff803773ce>] _xfs_buf_find+0x13e/0x240
 [<ffffffff8037753f>] xfs_buf_get_flags+0x6f/0x190
 [<ffffffff80377672>] xfs_buf_read_flags+0x12/0xa0
 [<ffffffff803687e4>] xfs_trans_read_buf+0x64/0x340
 [<ffffffff80352321>] xfs_itobp+0x81/0x1e0
 [<ffffffff80375bae>] xfs_buf_rele+0x2e/0xd0
 [<ffffffff80354cce>] xfs_iflush+0xfe/0x520
 [<ffffffff803ae592>] __down_read_trylock+0x42/0x60
 [<ffffffff80355c42>] xfs_inode_item_push+0x12/0x20
 [<ffffffff80368207>] xfs_trans_push_ail+0x267/0x2b0
 [<ffffffff8035c702>] xfs_log_reserve+0x72/0x120
 [<ffffffff80366bb8>] xfs_trans_reserve+0xa8/0x210
 [<ffffffff803525fb>] xfs_itruncate_finish+0xfb/0x310
 [<ffffffff80372364>] xfs_inactive+0x364/0x490
 [<ffffffff8037c834>] xfs_fs_clear_inode+0xa4/0xf0
 [<ffffffff802a8736>] clear_inode+0x66/0x150
 [<ffffffff802a899c>] generic_delete_inode+0x12c/0x140
 [<ffffffff8029e93d>] do_unlinkat+0x14d/0x1e0
 [<ffffffff8020bbbe>] system_call+0x7e/0x83

Next debug outputs:
[  410.310000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 28685
global 4 0 0 wc __ tw 1024 sk 0
[  414.600000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 28685
global 4 0 0 wc __ tw 1024 sk 0
[  419.620000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 28137
global 4 0 0 wc __ tw 1024 sk 0
[  424.630000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 25243
global 4 0 0 wc __ tw 1024 sk 0
[  429.630000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 25243
global 4 0 0 wc _M tw 1021 sk 0
[  429.640000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 25240
global 4 0 0 wc __ tw 1023 sk 0
[  434.720000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 25241
global 2 0 0 wc __ tw 1024 sk 0
[  439.720000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 25241
global 2 0 0 wc __ tw 1024 sk 0
[  444.720000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 25241
global 2 0 0 wc __ tw 1024 sk 0
[  449.720000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 25241
global 2 0 0 wc __ tw 1024 sk 0
[  455.840000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 25241
global 2 0 0 wc __ tw 1024 sk 0
[  459.720000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 25241
global 2 0 0 wc __ tw 1022 sk 0
[  464.720000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 25241
global 2 0 0 wc __ tw 1024 sk 0
[  469.720000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 25241
global 2 0 0 wc __ tw 1024 sk 0
[  475.040000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 22342
global 2 0 0 wc __ tw 1024 sk 0
[  480.060000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21772
global 2 0 0 wc __ tw 1024 sk 0
[  485.060000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21772
global 2 0 0 wc __ tw 1024 sk 0
[  490.060000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21772
global 2 0 0 wc __ tw 1022 sk 0
[  495.060000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21772
global 2 0 0 wc __ tw 1024 sk 0
[  500.060000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21774
global 3 0 0 wc __ tw 1024 sk 0
[  506.580000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21774
global 3 0 0 wc __ tw 1024 sk 0
[  510.760000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21774
global 3 0 0 wc __ tw 1024 sk 0
[  515.060000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21835
global 65 0 0 wc __ tw 1024 sk 0
[  520.060000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21835
global 65 0 0 wc __ tw 1024 sk 0
[  525.060000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21835
global 9 56 0 wc _M tw 961 sk 0
[  525.080000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21772
global 9 56 0 wc _M tw 1023 sk 0
[  525.100000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21771
global 9 56 0 wc _M tw 1023 sk 0
[  525.110000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21770
global 9 56 0 wc _M tw 1024 sk 0
[  525.150000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21770
global 9 56 0 wc _M tw 1024 sk 0
[  525.160000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21770
global 9 56 0 wc _M tw 1024 sk 0
[  525.170000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21770
global 9 56 0 wc _M tw 1023 sk 0
[  525.170000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21769
global 9 28 0 wc _M tw 1023 sk 0
[  525.190000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21768
global 9 28 0 wc _M tw 1024 sk 0
[  525.200000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21768
global 9 28 0 wc _M tw 1024 sk 0
[  525.210000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21768
global 9 28 0 wc _M tw 1024 sk 0
[  525.230000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 21768
global 9 28 0 wc __ tw 1023 sk 0
[  530.080000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 19499
global 2 0 0 wc __ tw 1024 sk 0
[  535.150000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 18676
global 2 0 0 wc __ tw 1024 sk 0
[  540.150000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 18676
global 2 0 0 wc __ tw 1024 sk 0
[  545.150000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 18676
global 2 0 0 wc __ tw 1024 sk 0
[  550.150000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 18676
global 2 0 0 wc __ tw 1024 sk 0
[  555.150000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 18676
global 2 0 0 wc __ tw 1024 sk 0
[  561.990000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 18676
global 1 0 0 wc __ tw 1022 sk 0
[  566.020000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 18676
global 2 0 0 wc __ tw 1024 sk 0
[  570.150000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 18676
global 2 0 0 wc __ tw 1024 sk 0
[  575.150000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 18676
global 2 0 0 wc __ tw 1024 sk 0
[  580.170000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 8244
global 3 0 0 wc __ tw 1024 sk 0
[  585.230000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 8695
global 8 0 0 wc __ tw 1024 sk 0
[  590.230000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10161
global 8 0 0 wc __ tw 1024 sk 0
[  595.230000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10161
global 8 0 0 wc _M tw 1020 sk 0
[  595.240000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10157
global 8 0 0 wc __ tw 1023 sk 0
[  600.230000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10159
global 6 0 0 wc __ tw 1024 sk 0
[  605.230000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10159
global 6 0 0 wc __ tw 1024 sk 0
[  610.230000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10159
global 6 0 0 wc __ tw 1024 sk 0
[  615.230000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10159
global 6 0 0 wc __ tw 1020 sk 0
[  620.290000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1024 sk 0
[  625.290000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1023 sk 0
[  630.290000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1023 sk 0
[  635.290000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1024 sk 0
[  640.290000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1024 sk 0
[  645.290000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1024 sk 0
[  650.350000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1024 sk 0
[  655.290000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10156
global 3 0 0 wc __ tw 1024 sk 0
[  660.290000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10156
global 3 0 0 wc _M tw 1023 sk 0
[  660.300000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 3 0 0 wc _M tw 1023 sk 0
[  660.310000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10154
global 3 1 0 wc _M tw 1024 sk 0
[  660.330000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10154
global 3 1 0 wc _M tw 1024 sk 0
[  660.350000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10154
global 3 1 0 wc _M tw 1024 sk 0
[  660.360000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10154
global 3 1 0 wc _M tw 1024 sk 0
[  660.370000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10154
global 3 1 0 wc _M tw 1024 sk 0
[  660.380000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10154
global 3 1 0 wc __ tw 1023 sk 0
[  665.320000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1023 sk 0
[  670.320000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1024 sk 0
[  675.320000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1024 sk 0
[  680.320000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1024 sk 0
[  685.320000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1024 sk 0
[  690.320000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1024 sk 0
[  695.320000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1023 sk 0
[  700.320000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1023 sk 0
[  705.320000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1024 sk 0
[  710.320000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1024 sk 0
[  715.320000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 10155
global 2 0 0 wc __ tw 1024 sk 0

I'm not sure, when emerge was finished here...

Secound unmerge:
[ 1177.110000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 16604
global 2 0 0 wc __ tw 1023 sk 0
[ 1182.110000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 16604
global 2 0 0 wc __ tw 1024 sk 0
[ 1187.130000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 15310
global 2 0 0 wc __ tw 1024 sk 0
[ 1192.150000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 13335
global 2 0 0 wc __ tw 1024 sk 0
-> SysRq+W during one emerge stall
[ 1194.530000] SysRq : Show Blocked State
[ 1194.530000]   task                        PC stack   pid father
[ 1194.540000] xfssyncd      D ffff8101065798f8     0  1040      2
[ 1194.540000]  ffff810006177d28 0000000000000046 0000000000000000
ffff81010904ae80
[ 1194.550000]  ffff81010904ae80 0000000000000001 ffffffff80819b00
ffffffff80819b00
[ 1194.560000]  ffffffff80815f40 ffffffff80819b00 ffffffff8039d996
0000000000000000
[ 1194.570000] Call Trace:
[ 1194.570000]  [<ffffffff8039d996>] submit_bio+0x66/0xf0
[ 1194.570000]  [<ffffffff80375952>] _xfs_buf_ioapply+0x222/0x320
[ 1194.580000]  [<ffffffff805b16a7>] __down+0xa7/0x11e
[ 1194.590000]  [<ffffffff8022da70>] default_wake_function+0x0/0x10
[ 1194.590000]  [<ffffffff80376ad5>] xfs_buf_iostart+0x65/0x90
[ 1194.600000]  [<ffffffff805b1325>] __down_failed+0x35/0x3a
[ 1194.600000]  [<ffffffff8034f34b>] xfs_iflock+0x1b/0x20
[ 1194.600000]  [<ffffffff8036d4d0>] xfs_finish_reclaim+0x1a0/0x1c0
[ 1194.600000]  [<ffffffff8036d5bb>] xfs_finish_reclaim_all+0xcb/0xf0
[ 1194.600000]  [<ffffffff8036b608>] xfs_syncsub+0x68/0x300
[ 1194.600000]  [<ffffffff8037cbe7>] xfs_sync_worker+0x17/0x40
[ 1194.600000]  [<ffffffff8037cea2>] xfssyncd+0x142/0x1d0
[ 1194.600000]  [<ffffffff8037cd60>] xfssyncd+0x0/0x1d0
[ 1194.600000]  [<ffffffff8024a32b>] kthread+0x4b/0x80
[ 1194.600000]  [<ffffffff8020c9d8>] child_rip+0xa/0x12
[ 1194.600000]  [<ffffffff80219bd0>] lapic_next_event+0x0/0x10
[ 1194.600000]  [<ffffffff8024a2e0>] kthread+0x0/0x80
[ 1194.600000]  [<ffffffff8020c9ce>] child_rip+0x0/0x12
[ 1194.600000]
[ 1194.600000] emerge        D 0000000000000000     0  6742   6116
[ 1194.600000]  ffff81000cc4d9e8 0000000000000086 0000000000000000
0000007000000001
[ 1194.600000]  0000000000000818 ffffffff00000000 ffffffff80819b00
ffffffff80819b00
[ 1194.600000]  ffffffff80815f40 ffffffff80819b00 ffff81000cc4d9a8
ffff81000cc4d998
[ 1194.600000] Call Trace:
[ 1194.600000]  [<ffffffff805b16a7>] __down+0xa7/0x11e
[ 1194.600000]  [<ffffffff8022da70>] default_wake_function+0x0/0x10
[ 1194.600000]  [<ffffffff805b1325>] __down_failed+0x35/0x3a
[ 1194.600000]  [<ffffffff8037528e>] xfs_buf_lock+0x3e/0x40
[ 1194.600000]  [<ffffffff803773ce>] _xfs_buf_find+0x13e/0x240
[ 1194.600000]  [<ffffffff8037753f>] xfs_buf_get_flags+0x6f/0x190
[ 1194.600000]  [<ffffffff80377672>] xfs_buf_read_flags+0x12/0xa0
[ 1194.600000]  [<ffffffff803687e4>] xfs_trans_read_buf+0x64/0x340
[ 1194.600000]  [<ffffffff80352321>] xfs_itobp+0x81/0x1e0
[ 1194.600000]  [<ffffffff80375bae>] xfs_buf_rele+0x2e/0xd0
[ 1194.600000]  [<ffffffff80354cce>] xfs_iflush+0xfe/0x520
[ 1194.600000]  [<ffffffff803ae592>] __down_read_trylock+0x42/0x60
[ 1194.600000]  [<ffffffff80355c42>] xfs_inode_item_push+0x12/0x20
[ 1194.600000]  [<ffffffff80368207>] xfs_trans_push_ail+0x267/0x2b0
[ 1194.600000]  [<ffffffff8035c702>] xfs_log_reserve+0x72/0x120
[ 1194.600000]  [<ffffffff80366bb8>] xfs_trans_reserve+0xa8/0x210
[ 1194.600000]  [<ffffffff803525fb>] xfs_itruncate_finish+0xfb/0x310
[ 1194.600000]  [<ffffffff80372364>] xfs_inactive+0x364/0x490
[ 1194.600000]  [<ffffffff8037c834>] xfs_fs_clear_inode+0xa4/0xf0
[ 1194.600000]  [<ffffffff802a8736>] clear_inode+0x66/0x150
[ 1194.600000]  [<ffffffff802a899c>] generic_delete_inode+0x12c/0x140
[ 1194.600000]  [<ffffffff8029e93d>] do_unlinkat+0x14d/0x1e0
[ 1194.600000]  [<ffffffff8020bbbe>] system_call+0x7e/0x83
[ 1194.600000]
[ 1197.150000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 13337
global 4 0 0 wc __ tw 1024 sk 0
[ 1202.150000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 13337
global 4 0 0 wc __ tw 1024 sk 0
[ 1207.150000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 13337
global 4 0 0 wc _M tw 1021 sk 0
[ 1207.240000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 13334
global 4 0 0 wc _M tw 1023 sk 0
[ 1207.260000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 13333
global 4 0 0 wc __ tw 1023 sk 0
...
After emerge finished:
[ 1322.630000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11163
global 3 0 0 wc _M tw 1022 sk 0
[ 1322.650000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11161
global 3 0 0 wc __ tw 1023 sk 0
[ 1327.630000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11162
global 2 0 0 wc __ tw 1024 sk 0
[ 1332.630000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11162
global 2 0 0 wc __ tw 1024 sk 0
[ 1337.630000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11162
global 2 0 0 wc __ tw 1024 sk 0
[ 1342.630000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11162
global 2 0 0 wc __ tw 1024 sk 0
[ 1347.630000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11162
global 2 0 0 wc __ tw 1024 sk 0
-> After emerge finishes xfssyncd seems the only blocked process. Does
this process do the continuing writeout?
[ 1351.880000] SysRq : Show Blocked State
[ 1351.880000]   task                        PC stack   pid father
[ 1351.880000] xfssyncd      D ffff810104f0f6f8     0  1040      2
[ 1351.880000]  ffff810006177d28 0000000000000046 0000000000000000
ffff810101359380
[ 1351.880000]  ffff810101359380 0000000000000001 ffffffff80819b00
ffffffff80819b00
[ 1351.880000]  ffffffff80815f40 ffffffff80819b00 ffffffff8039d996
0000000000000000
[ 1351.880000] Call Trace:
[ 1351.880000]  [<ffffffff8039d996>] submit_bio+0x66/0xf0
[ 1351.880000]  [<ffffffff80375952>] _xfs_buf_ioapply+0x222/0x320
[ 1351.880000]  [<ffffffff805b16a7>] __down+0xa7/0x11e
[ 1351.880000]  [<ffffffff8022da70>] default_wake_function+0x0/0x10
[ 1351.880000]  [<ffffffff80376ad5>] xfs_buf_iostart+0x65/0x90
[ 1351.880000]  [<ffffffff805b1325>] __down_failed+0x35/0x3a
[ 1351.880000]  [<ffffffff8034f34b>] xfs_iflock+0x1b/0x20
[ 1351.880000]  [<ffffffff8036d4d0>] xfs_finish_reclaim+0x1a0/0x1c0
[ 1351.880000]  [<ffffffff8036d5bb>] xfs_finish_reclaim_all+0xcb/0xf0
[ 1351.880000]  [<ffffffff8036b608>] xfs_syncsub+0x68/0x300
[ 1351.880000]  [<ffffffff8037cbe7>] xfs_sync_worker+0x17/0x40
[ 1351.880000]  [<ffffffff8037cea2>] xfssyncd+0x142/0x1d0
[ 1351.880000]  [<ffffffff8037cd60>] xfssyncd+0x0/0x1d0
[ 1351.880000]  [<ffffffff8024a32b>] kthread+0x4b/0x80
[ 1351.880000]  [<ffffffff8020c9d8>] child_rip+0xa/0x12
[ 1351.880000]  [<ffffffff80219bd0>] lapic_next_event+0x0/0x10
[ 1351.880000]  [<ffffffff8024a2e0>] kthread+0x0/0x80
[ 1351.880000]  [<ffffffff8020c9ce>] child_rip+0x0/0x12
[ 1351.880000]
[ 1352.630000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11163
global 3 0 0 wc __ tw 1024 sk 0
[ 1357.630000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11216
global 3 0 0 wc _M tw 1022 sk 0
[ 1357.650000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11214
global 3 0 0 wc _M tw 1023 sk 0
[ 1357.670000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11213
global 1 3 0 wc _M tw 1024 sk 0
[ 1357.690000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11213
global 1 3 0 wc _M tw 1024 sk 0
[ 1357.700000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11213
global 1 3 0 wc __ tw 1023 sk 0
[ 1362.630000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11285
global 2 0 0 wc __ tw 1024 sk 0
[ 1367.630000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11289
global 2 0 0 wc __ tw 1024 sk 0
[ 1372.650000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11289
global 2 0 0 wc __ tw 1024 sk 0
-> Here I am trying SysRq+S to stop/finish the continued writeout of
8kB dirty data, but the disk where still working after that...
[ 1375.860000] SysRq : Emergency Sync
[ 1375.860000] mm/page-writeback.c 587 background_writeout:
pdflush(284) 0 global 2 0 0 wc __ tw 1022 sk 0
[ 1375.960000] Emergency Sync complete
[ 1377.650000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11288
global 1 0 0 wc __ tw 1024 sk 0
[ 1382.670000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11276
global 2 0 0 wc __ tw 1024 sk 0
[ 1387.670000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11276
global 2 0 0 wc __ tw 1024 sk 0
[ 1389.720000] mm/page-writeback.c 587 background_writeout:
pdflush(285) 0 global 2 0 0 wc __ tw 1022 sk 0
[ 1392.670000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11277
global 1 0 0 wc __ tw 1024 sk 0
[ 1397.670000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11278
global 2 0 0 wc __ tw 1024 sk 0
[ 1402.670000] mm/page-writeback.c 661 wb_kupdate: pdflush(285) 11278
global 2 0 0 wc __ tw 1024 sk 0

I also did a SysRq+T, but nothing interessing in it.
All processes sleeping in schedule_timeout and other timer stuff,
except emerge and xfssyncd in state D (similar calltrace to the
SysRq+W) and md1_raid5:
[  495.640000] md1_raid5     D 0000000000000000     0   946      2
[  495.640000]  ffff810006145d20 0000000000000046 0000000000000000
00000000000000
[  495.640000]  0000000000000010 ffffffff00000000 ffffffff80819b00
ffffffff80819b
[  495.640000]  ffffffff80815f40 ffffffff80819b00 ffff810006145ce0
ffff810006145c
[  495.640000] Call Trace:
[  495.640000]  [<ffffffff8039d996>] submit_bio+0x66/0xf0
[  495.640000]  [<ffffffff804c41e5>] md_super_wait+0xb5/0xd0
[  495.640000]  [<ffffffff8024a710>] autoremove_wake_function+0x0/0x30
[  495.640000]  [<ffffffff804ccb60>] bitmap_unplug+0x1b0/0x1c0
[  495.640000]  [<ffffffff804cab90>] md_thread+0x0/0x100
[  495.640000]  [<ffffffff804bf3d6>] raid5d+0xa6/0x490
[  495.640000]  [<ffffffff805b0197>] schedule_timeout+0x67/0xd0
[  495.640000]  [<ffffffff8023e740>] process_timeout+0x0/0x10
[  495.640000]  [<ffffffff805b018a>] schedule_timeout+0x5a/0xd0
[  495.640000]  [<ffffffff804cab90>] md_thread+0x0/0x100
[  495.640000]  [<ffffffff804cabc0>] md_thread+0x30/0x100
[  495.640000]  [<ffffffff8024a710>] autoremove_wake_function+0x0/0x30
[  495.640000]  [<ffffffff804cab90>] md_thread+0x0/0x100
[  495.640000]  [<ffffffff8024a32b>] kthread+0x4b/0x80
[  495.640000]  [<ffffffff8020c9d8>] child_rip+0xa/0x12
[  495.640000]  [<ffffffff8024a2e0>] kthread+0x0/0x80
[  495.640000]  [<ffffffff8020c9ce>] child_rip+0x0/0x12

The following processes where running:
events/3      R  running task        0    18      2
syslog-ng     R  running task        0  4616      1
X             R  running task        0  5814   5764

[snip]
> > I don't know. It's just that I have seen tmpfs also redirtieing inodes
> > in these logs and the stalling emerge is moving files from tmpfs to
> > xfs.
> > It could be, but I don't know enough about tmpfs internals to really be sure.
> > I just wanted to mention, that tmpfs is involved somehow.
>
> The requeue messages for tmpfs are not pleasant, but known to be fine ;-)

OK, didnt know that. But makes sense. Dirty tmpfs inodes do not sound
like a problem, but more like the normal case. ;-)

I will try the patch from Peter and see if, this solves the
emerge/installing part and post logs from that...

Torsten

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-02 10:15                                       ` Peter Zijlstra
@ 2007-11-02 19:22                                         ` Torsten Kaiser
  2007-11-02 20:43                                           ` David Chinner
       [not found]                                           ` <394340668.31055@ustc.edu.cn>
       [not found]                                         ` <393999615.15343@ustc.edu.cn>
  1 sibling, 2 replies; 61+ messages in thread
From: Torsten Kaiser @ 2007-11-02 19:22 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Fengguang Wu, Maxim Levitsky, linux-kernel, Andrew Morton,
	David Chinner, linux-fsdevel

On 11/2/07, Peter Zijlstra <peterz@infradead.org> wrote:
> On Fri, 2007-11-02 at 10:21 +0800, Fengguang Wu wrote:
>
> > Interestingly, no background_writeout() appears, but only
> > balance_dirty_pages() and wb_kupdate.  Obviously wb_kupdate won't
> > block the process.
>
> Yeah, the background threshold is not (yet) scaled. So it can happen
> that the bdi_dirty limit is below the background limit.

I still have not seen a trigger of the "throttle_vm_writeout".
This time installing 2.6.24-rc1 again it not even triggerd any other
debugs apart from the one in wb_kupdate.
But 300Mb of new files might still not trigger this with 4Gb of RAM.

I'm currently testing 2.6.23-mm1 with this patch and the second
writeback-debug patch.

> I'm curious though as to these stalls, though, I can't seem to think of
> what goes wrong.. esp since most writeback seems to happen from pdflush.

I also don't know. But looking at the time the system takes to write
out 8kb, I'm starting to suspect that something is writing this out,
but not marking it clean... (Or redirtying it immediately?)

> (or I'm totally misreading it - quite a possible as I'm still recovering
> from a serious cold and not all the green stuff has yet figured out its
> proper place wrt brain cells 'n stuff)

Get well soon!

> I still have this patch floating around:
>
> ---
> Subject: mm: speed up writeback ramp-up on clean systems

applied, but did not fix the stalls.

Here the complete log from vmstat 10 and the syslog from an install of
vanilla 2.6.24-rc1.
(Please note: I installed the source of vanilla 2.6.24-rc1, but I am
still using 2.6.23-mm1!)
All lines with [note] are my comments about what the system was doing,
both logs are from the same run, so the notes should be more or less
in sync. I used SysRq+L to insert the SysRq-Helptext into the syslog
as marker...

The visible effects are similar to the unmerge run, but the stalls
during the moving did only start later. But the same effect after
emerge finished and the almost all dirty data was written, was
visible: I can still hear the disks and see the hdd light flickering
(mostly on) for much, much longer than it should take to write 8kb.

vmstat 10:
[note]emerge start
 1  0      0 3668496    332 187748    0    0     0    29   39  491  3  0 96  0
 1  0      0 3623940    332 188880    0    0    83    17 1724 3893 15  2 81  1
 0  0      0 3559488    332 252432    0    0  1021    48 11719 4536  9  4 74 13
 2  0      0 3482220    332 311916    0    0    70    60   93 3818 11  3 86  0
 1  0      0 3289352    332 486932    0    0     2    35   33 11997 25  3 72  0
 1  0      0 3174036    332 596412    0    0    10    33   35 3937 21  4 75  0
 2  0      0 3215756    332 555292    0    0     6    28   85  742 12 12 76  0
 2  0      0 3202128    332 559792    0    0    32     9   34 1566 31  1 68  0
 2  0      0 3192804    332 568072    0    0    60    46  172 4206 30  2 67  1
 3  0      0 3202424    332 572620    0    0     0    20  111 2223 27  1 72  0
 1  0      0 3196112    332 578900    0    0     0  1649  149 2763 25  2 73  0
 1  0      0 3190004    332 584956    0    0     0    17  110 2270 25  1 74  0
 1  0      0 3183952    332 590840    0    0     0    11  104 2553 25  1 74  0
 1  0      0 3176952    332 597068    0    0     0  2153  124 2886 25  2 72  0
 1  0      0 3171044    332 602592    0    0     0    22  109 2580 26  1 73  0
 1  0      0 3174896    332 605496    0    0   173  1441  312 2249  9  6 84  1
 1  0      0 3165204    332 611856    0    0   569  3221  606 4236  4  7 87  1
 0  0      0 3160856    332 613516    0    0   116  2281  570 3077  3  5 92  0
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 0  0      0 3154712    332 615200    0    0   108  2166  528 3038  3  4 93  0
 0  0      0 3156008    332 615420    0    0    18  1941  537 1015  0  2 97  0
 0  0      0 3156652    332 615504    0    0     8  2232  547  900  0  2 98  0
 0  0      0 3156748    332 615672    0    0    12  1932  537  947  0  2 98  0
 0  0      0 3154720    332 615900    0    0    14  2204  584 1256  1  2 97  0
 0  0      0 3154256    332 616060    0    0    10  2676  610 1317  1  3 96  0
 1  0      0 3152488    332 616284    0    0     9  1994  573 1024  1  2 97  0
 0  0      0 3152404    332 616408    0    0     4  2218  540  904  0  2 97  0
 0  0      0 3151244    332 617156    0    0    44  2198  598 1921  2  4 94  0
 0  0      0 3147224    332 618672    0    0   110  1802  644 2575  3  4 93  0
 0  0      0 3144608    332 619824    0    0    80  1590  543 1900  2  4 95  0
 0  0      0 3140768    332 621448    0    0   111  1758  657 2735  3  4 93  0
 0  0      0 3140816    332 621896    0    0    26   801  531 1667  1  2 98  0
[note] first stall, SysRq+W
 1  0      0 3127620    332 621896    0    0     0   640  490 1381  2  1 97  0
 0  0      0 3127780    332 621900    0    0     0   627  475 1531  2  1 98  0
 0  0      0 3127560    332 621900    0    0     0   587  464 1428  0  0 99  0
 1  0      0 3126272    332 622460    0    0    32   945  556 1922  1  2 97  0
[note] installing resumes
 0  0      0 3120860    332 624048    0    0    94  1950  785 2582  4  5 91  0
 0  0      0 3117392    332 625200    0    0    76  1258  742 2217  2  3 95  0
[note] second stall
 0  0      0 3118192    332 625200    0    0     0   617  559 1617  0  1 99  0
 0  0      0 3118836    332 625200    0    0     0   603  550 1576  5  1 94  0
 0  0      0 3118728    332 625200    0    0     0   682  601 1454  0  0 99  0
 0  0      0 3118860    332 625200    0    0     0   653  557 1382  0  1 99  0
[note] installing resumes
 1  0      0 3111356    332 624576    0    0    91  1277  789 2086 11  4 84  1
 0  0      0 3149768    332 627792    0    0   322   504  655 1444  1  2 96  1
 0  0      0 3150064    332 627792    0    0     0   559  623 1340  0  0 99  0
[note] emerge is finished, ~200Mb dirty data
 0  0      0 3150220    332 627792    0    0     0   622  553 1553  2  1 97  0
 0  0      0 3150456    332 627792    0    0     0   518  595 1315  0  1 99  0
 0  0      0 3149380    332 627792    0    0     0  3759  801 1277  0  3 97  0
 0  0      0 3148664    332 627840    0    0     0  3925  873 1500  0  4 96  0
 0  0      0 3149672    332 627868    0    0     0  2476  800 1355  0  3 97  0
 0  0      0 3148012    332 627872    0    0     0  2865  806 1235  0  3 97  0
 0  0      0 3150496    332 627936    0    0     0  3074  847 1288  0  3 97  0
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 0  0      0 3149568    332 627968    0    0     0  2238  751 1070  0  2 97  0
 0  0      0 3150260    332 627988    0    0     0   872  607 1073  0  1 99  0
 0  0      0 3150228    332 627988    0    0     0  1711  715 1214  0  2 98  0
 0  0      0 3149300    332 627988    0    0     0  2195  752 1042  0  2 98  0
 1  0      0 3150036    332 628032    0    0     0  2192  759 1118  0  2 97  0
 0  0      0 3150868    332 628032    0    0     0  1035  639 1138  0  1 99  0
 0  0      0 3150876    332 628068    0    0     0  1437  740 1153  0  1 98  0
 0  0      0 3151152    332 628068    0    0     0   446  545 1381  0  0 100  0
 0  0      0 3151212    332 628068    0    0     0   461  551 1412  2  0 98  0
[note] normal writeout finishes ~116kb dirty data left
 1  0      0 3151088    332 628068    0    0     0   472  552 1468  0  0 99  0
 0  0      0 3151260    332 628068    0    0     0   462  533 1369  0  0 100  0
 0  0      0 3151296    332 628068    0    0     0   464  559 1325  0  0 100  0
 0  0      0 3150992    332 628068    0    0     0   485  533 1350  0  0 100  0
 0  0      0 3151092    332 628068    0    0     0   492  543 1378  0  0 100  0
[note] hit SysRq+W and SysRw+M
 0  0      0 3150828    332 628076    0    0     0   430  541 1449  9  1 90  0
 0  0      0 3150932    332 628076    0    0     0   459  535 1401  0  0 100  0
 0  0      0 3151068    332 628076    0    0     0   465  536 1471  0  0 99  0
 0  0      0 3151164    332 628076    0    0     0   453  525 1349  0  0 100  0
 0  0      0 3151208    332 628076    0    0     0   474  530 1354  0  0 100  0
 1  0      0 3151036    332 628076    0    0     0   449  506 1348  0  0 100  0
 0  0      0 3151148    332 628076    0    0     0   476  520 1314  0  0 100  0
 0  0      0 3151080    332 628076    0    0     0   467  521 1373  0  0 100  0
 0  0      0 3151096    332 628076    0    0     0   464  521 1324  0  0 100  0
 0  0      0 3151220    332 628076    0    0     0   461  548 1360  0  0 100  0
 0  0      0 3151144    332 628076    0    0     0   417  480 1329  0  0 100  0
 0  0      0 3150892    332 628076    0    0     0   492  543 1363  0  0 99  0
 0  0      0 3151048    332 628076    0    0     0   436  515 1298  0  0 100  0
 0  0      0 3151076    332 628076    0    0     0   434  513 1402  0  0 100  0
 0  0      0 3151296    332 628076    0    0     0   430  508 1367  0  0 100  0
 0  0      0 3150940    332 628076    0    0     0   472  527 1331  0  0 100  0
 0  0      0 3151016    332 628076    0    0     0   472  527 1315  0  0 100  0
 0  0      0 3151024    332 628076    0    0     0   227  409  703  0  0 100  0
 0  0      0 3151272    332 628080    0    0     0    11  315  262  2  0 98  0
[note] writeout really finishes, disks go idle.

from syslog:
[note] emerge started, this unpacks the kernel into a tmpfs, patches
it to rc1, packs it into a tar.bz2 and then moves the files from the
tmpfs to my main xfs root fs
[  322.230000] SysRq : HELP : loglevel0-8 reBoot tErm Full kIll saK
showMem Nice powerOff showPc show-all-timers(Q) unRaw Sync showTasks
Unmount shoW-blocked-tasks
[  323.120000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 20090
global 25 0 0 wc __ tw 1024 sk 0
[  328.230000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 20091
global 26 0 0 wc __ tw 1024 sk 0
[  333.290000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 20131
global 29 0 0 wc _M tw 1023 sk 0
[  333.360000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 20130
global 29 0 0 wc _M tw 1023 sk 0
[  333.390000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 20129
global 29 0 0 wc __ tw 1023 sk 0
[  338.300000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 20131
global 28 0 0 wc __ tw 1024 sk 0
[  343.360000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 20196
global 1 28 0 wc __ tw 1000 sk 0
[  348.330000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 20188
global 4 0 0 wc __ tw 1024 sk 0
[  353.380000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 27417
global 4 0 0 wc __ tw 1024 sk 0
[  358.380000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 31801
global 4 0 0 wc __ tw 1024 sk 0
[  363.380000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 40783
global 4 0 0 wc __ tw 1021 sk 0
[  368.460000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 44080
global 1 0 0 wc __ tw 1023 sk 0
[  373.460000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 44085
global 1 0 0 wc __ tw 1024 sk 0
[  378.460000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 44631
global 1 0 0 wc __ tw 1024 sk 0
[  383.510000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 44709
global 1 0 0 wc __ tw 1024 sk 0
[note] around here the creation of the tar.bz2 started
[  388.520000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 45134
global 426 0 0 wc __ tw 1024 sk 0
[  393.530000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 45884
global 1148 0 0 wc __ tw 1024 sk 0
[  398.530000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 47002
global 2262 0 0 wc __ tw 1023 sk 0
[  403.570000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 47619
global 2888 0 0 wc __ tw 1024 sk 0
[  408.570000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 48276
global 3545 0 0 wc __ tw 1024 sk 0
[  413.570000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 48740
global 2997 1012 0 wc _M tw -1 sk 0
[  413.570000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 47715
global 2997 1012 0 wc _M tw 1024 sk 0
[  413.580000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 47715
global 1985 2024 0 wc _M tw -1 sk 0
[  413.590000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46690
global 973 3036 0 wc _M tw -1 sk 0
[  413.590000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 45665
global 7 4002 0 wc __ tw 64 sk 0
[  418.630000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 45595
global 864 0 0 wc __ tw 1024 sk 0
[  423.630000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46294
global 1563 0 0 wc __ tw 1024 sk 0
[  428.630000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 47036
global 2305 0 0 wc __ tw 1023 sk 0
[  433.630000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 47731
global 3000 0 0 wc __ tw 1024 sk 0
[  438.630000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 48525
global 3794 0 0 wc __ tw 1024 sk 0
[  443.630000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 49159
global 4428 0 0 wc __ tw 1024 sk 0
[note] around here the moving from the tmpfs to the xfs started
[  448.630000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 50047
global 4304 1012 0 wc _M tw -1 sk 0
[  448.640000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 49022
global 3292 2024 0 wc _M tw -1 sk 0
[  448.650000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 47997
global 2234 3082 0 wc _M tw -1 sk 0
[  448.650000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46972
global 1222 4094 0 wc _M tw -1 sk 0
[  448.660000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 45947
global 210 5106 0 wc _M tw -1 sk 0
[  448.660000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 44922
global 0 5336 0 wc __ tw 812 sk 0
[  453.700000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 45385
global 654 0 0 wc __ tw 1024 sk 0
[  458.700000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 45881
global 1150 0 0 wc _M tw 1023 sk 0
[  458.790000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 45880
global 1196 0 0 wc _M tw 1023 sk 0
[  458.810000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 45879
global 1196 0 0 wc __ tw 1023 sk 0
[  463.840000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 44729
global 0 0 0 wc __ tw 1024 sk 0
[  468.860000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 45653
global 869 0 0 wc __ tw 1024 sk 0
[  473.880000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 51262
global 6380 0 0 wc __ tw 1024 sk 0
[  478.920000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 56488
global 11523 0 0 wc __ tw 1024 sk 0
[  485.260000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 58839
global 13842 0 0 wc __ tw 1024 sk 0
[  490.260000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 60796
global 15746 0 0 wc __ tw 1023 sk 0
[  495.270000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 64003
global 18907 0 0 wc __ tw 1023 sk 0
[  502.330000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 67524
global 21467 336 0 wc _M tw -5 sk 0
[  505.350000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 66495
global 20615 51 0 wc _M tw 0 sk 0
[  508.140000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 65471
global 19727 213 0 wc _M tw -1 sk 0
[  508.550000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 64446
global 19483 336 0 wc _M tw 760 sk 0
[  509.180000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 64182
global 19470 94 0 wc __ tw 1012 sk 0
[  514.190000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 65780
global 19665 172 0 wc __ tw -1 sk 0
[  517.310000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 64755
global 18827 14 0 wc __ tw -1 sk 0
[  520.100000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 63730
global 17929 96 0 wc _M tw -13 sk 0
[  522.560000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 62693
global 16937 167 0 wc _M tw -1 sk 0
[  527.050000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 61668
global 16021 95 0 wc _M tw -6 sk 0
[  530.460000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 60638
global 15115 52 0 wc _M tw -1 sk 0
[  534.470000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 59613
global 14222 27 0 wc _M tw -4 sk 0
[  537.760000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 58585
global 13386 54 0 wc _M tw 0 sk 0
[  541.050000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 57561
global 12737 58 0 wc _M tw 281 sk 0
[  541.090000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 56818
global 12737 58 0 wc __ tw 1022 sk 0
[  547.200000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 58858
global 12829 72 0 wc __ tw 0 sk 0
[  550.480000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 57834
global 12017 62 0 wc __ tw 0 sk 0
[  552.710000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 56810
global 11133 83 0 wc __ tw 0 sk 0
[  558.660000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 55786
global 10470 33 0 wc _M tw 0 sk 0
[  562.750000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 54762
global 10555 69 0 wc _M tw 0 sk 0
[  565.150000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 53738
global 9562 498 0 wc _M tw -2 sk 0
[  569.490000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 52712
global 8960 2 0 wc _M tw 0 sk 0
[  572.910000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 51688
global 8088 205 0 wc _M tw -13 sk 0
[  574.610000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 50651
global 7114 188 0 wc _M tw -1 sk 0
[  584.270000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 49626
global 14544 0 0 wc _M tw -1 sk 0
[  593.050000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 48601
global 24583 736 0 wc _M tw -1 sk 0
[  600.180000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 47576
global 27004 6 0 wc _M tw 587 sk 0
[  600.180000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 47139
global 27004 6 0 wc __ tw 1014 sk 0
[note] first stall, the output from emerge stops, so it seems it can
not start processing the next file until the stall ends
[  630.000000] SysRq : Emergency Sync
[  630.120000] Emergency Sync complete
[  632.850000] SysRq : Show Blocked State
[  632.850000]   task                        PC stack   pid father
[  632.850000] pdflush       D ffff81000f091788     0   285      2
[  632.850000]  ffff810005d4da80 0000000000000046 0000000000000800
0000007000000001
[  632.850000]  ffff81000fd52400 ffffffff8022d61c ffffffff80819b00
ffffffff80819b00
[  632.850000]  ffffffff80815f40 ffffffff80819b00 ffff810100316f98
0000000000000000
[  632.850000] Call Trace:
[  632.850000]  [<ffffffff8022d61c>] task_rq_lock+0x4c/0x90
[  632.850000]  [<ffffffff8022c8ea>] __wake_up_common+0x5a/0x90
[  632.850000]  [<ffffffff805b16e7>] __down+0xa7/0x11e
[  632.850000]  [<ffffffff8022da70>] default_wake_function+0x0/0x10
[  632.850000]  [<ffffffff805b1365>] __down_failed+0x35/0x3a
[  632.850000]  [<ffffffff803752ce>] xfs_buf_lock+0x3e/0x40
[  632.850000]  [<ffffffff8037740e>] _xfs_buf_find+0x13e/0x240
[  632.850000]  [<ffffffff8037757f>] xfs_buf_get_flags+0x6f/0x190
[  632.850000]  [<ffffffff803776b2>] xfs_buf_read_flags+0x12/0xa0
[  632.850000]  [<ffffffff80368824>] xfs_trans_read_buf+0x64/0x340
[  632.850000]  [<ffffffff80352361>] xfs_itobp+0x81/0x1e0
[  632.850000]  [<ffffffff8026b293>] write_cache_pages+0x123/0x330
[  632.850000]  [<ffffffff80354d0e>] xfs_iflush+0xfe/0x520
[  632.850000]  [<ffffffff803ae5d2>] __down_read_trylock+0x42/0x60
[  632.850000]  [<ffffffff8036ed49>] xfs_inode_flush+0x179/0x1b0
[  632.850000]  [<ffffffff8037ca8f>] xfs_fs_write_inode+0x2f/0x90
[  632.850000]  [<ffffffff802b3aac>] __writeback_single_inode+0x2ac/0x380
[  632.850000]  [<ffffffff804d074e>] dm_table_any_congested+0x2e/0x80
[  632.850000]  [<ffffffff802b3f9d>] generic_sync_sb_inodes+0x20d/0x330
[  632.850000]  [<ffffffff802b4532>] writeback_inodes+0xa2/0xe0
[  632.850000]  [<ffffffff8026bfd6>] wb_kupdate+0xa6/0x140
[  632.850000]  [<ffffffff8026c4b0>] pdflush+0x0/0x1e0
[  632.850000]  [<ffffffff8026c5c0>] pdflush+0x110/0x1e0
[  632.850000]  [<ffffffff8026bf30>] wb_kupdate+0x0/0x140
[  632.850000]  [<ffffffff8024a32b>] kthread+0x4b/0x80
[  632.850000]  [<ffffffff8020c9d8>] child_rip+0xa/0x12
[  632.850000]  [<ffffffff8024a2e0>] kthread+0x0/0x80
[  632.850000]  [<ffffffff8020c9ce>] child_rip+0x0/0x12
[  632.850000]
[  632.850000] emerge        D 0000000000000000     0  6220   6129
[  632.850000]  ffff810103ced9f8 0000000000000086 0000000000000000
0000007000000001
[  632.850000]  ffff81000fd52cf8 ffffffff00000000 ffffffff80819b00
ffffffff80819b00
[  632.850000]  ffffffff80815f40 ffffffff80819b00 ffff810103ced9b8
ffff810103ced9a8
[  632.850000] Call Trace:
[  632.850000]  [<ffffffff805b16e7>] __down+0xa7/0x11e
[  632.850000]  [<ffffffff8022da70>] default_wake_function+0x0/0x10
[  632.850000]  [<ffffffff805b1365>] __down_failed+0x35/0x3a
[  632.850000]  [<ffffffff803752ce>] xfs_buf_lock+0x3e/0x40
[  632.850000]  [<ffffffff8037740e>] _xfs_buf_find+0x13e/0x240
[  632.850000]  [<ffffffff8037757f>] xfs_buf_get_flags+0x6f/0x190
[  632.850000]  [<ffffffff803776b2>] xfs_buf_read_flags+0x12/0xa0
[  632.850000]  [<ffffffff80368824>] xfs_trans_read_buf+0x64/0x340
[  632.850000]  [<ffffffff80352361>] xfs_itobp+0x81/0x1e0
[  632.850000]  [<ffffffff80375bee>] xfs_buf_rele+0x2e/0xd0
[  632.850000]  [<ffffffff80354d0e>] xfs_iflush+0xfe/0x520
[  632.850000]  [<ffffffff803ae5d2>] __down_read_trylock+0x42/0x60
[  632.850000]  [<ffffffff80355c82>] xfs_inode_item_push+0x12/0x20
[  632.850000]  [<ffffffff80368247>] xfs_trans_push_ail+0x267/0x2b0
[  632.850000]  [<ffffffff8035c742>] xfs_log_reserve+0x72/0x120
[  632.850000]  [<ffffffff80366bf8>] xfs_trans_reserve+0xa8/0x210
[  632.850000]  [<ffffffff803731f2>] kmem_zone_zalloc+0x32/0x50
[  632.850000]  [<ffffffff8035263b>] xfs_itruncate_finish+0xfb/0x310
[  632.850000]  [<ffffffff8036daeb>] xfs_free_eofblocks+0x23b/0x280
[  632.850000]  [<ffffffff80371f93>] xfs_release+0x153/0x200
[  632.850000]  [<ffffffff80378010>] xfs_file_release+0x10/0x20
[  632.850000]  [<ffffffff80294251>] __fput+0xb1/0x220
[  632.850000]  [<ffffffff802910a4>] filp_close+0x54/0x90
[  632.850000]  [<ffffffff802929bf>] sys_close+0x9f/0x100
[  632.850000]  [<ffffffff8020bbbe>] system_call+0x7e/0x83
[  632.850000]
[  662.180000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 73045
global 39157 0 0 wc __ tw 0 sk 0
[note] emerge resumed
[  664.030000] SysRq : HELP : loglevel0-8 reBoot tErm Full kIll saK
showMem Nice powerOff showPc show-all-timers(Q) unRaw Sync showTasks
Unmount shoW-blocked-tasks
[  673.150000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 72021
global 44617 0 0 wc __ tw -3 sk 0
[note] emerge stalled again
[  693.930000] SysRq : HELP : loglevel0-8 reBoot tErm Full kIll saK
showMem Nice powerOff showPc show-all-timers(Q) unRaw Sync showTasks
Unmount shoW-blocked-tasks
[  724.580000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 70994
global 48064 26 0 wc _M tw -5 sk 0
[note] emerge resumed again
[  724.710000] SysRq : HELP : loglevel0-8 reBoot tErm Full kIll saK
showMem Nice powerOff showPc show-all-timers(Q) unRaw Sync showTasks
Unmount shoW-blocked-tasks
[  751.470000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 69965
global 47914 46 0 wc _M tw -1 sk 0
[note] emerge is finished, but 200Mb of dirty data remain
[  761.950000] SysRq : HELP : loglevel0-8 reBoot tErm Full kIll saK
showMem Nice powerOff showPc show-all-timers(Q) unRaw Sync showTasks
Unmount shoW-blocked-tasks
[  775.520000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 68940
global 46911 414 0 wc _M tw 0 sk 0
[  776.280000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 67916
global 45859 724 0 wc _M tw -2 sk 0
[  777.370000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 66890
global 44834 325 0 wc _M tw -10 sk 0
[  778.450000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 65856
global 43828 242 0 wc _M tw -1 sk 0
[  779.020000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 64831
global 42807 484 0 wc _M tw -1 sk 0
[  780.440000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 63806
global 41768 47 0 wc _M tw -7 sk 0
[  781.560000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 62775
global 40730 445 0 wc _M tw 0 sk 0
[  783.000000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 61751
global 39705 322 0 wc _M tw -3 sk 0
[  785.140000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 60724
global 38732 310 0 wc _M tw -4 sk 0
[  786.390000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 59696
global 37673 406 0 wc _M tw -6 sk 0
[  787.310000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 58666
global 36636 495 0 wc _M tw -9 sk 0
[  787.720000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 57633
global 35578 955 0 wc _M tw -1 sk 0
[  789.100000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 56608
global 34592 139 0 wc _M tw 0 sk 0
[  790.400000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 55584
global 33567 25 0 wc _M tw -3 sk 0
[  791.780000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 54557
global 32491 305 0 wc _M tw -11 sk 0
[  793.790000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 53522
global 31499 506 0 wc _M tw -5 sk 0
[  796.680000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 52493
global 30462 184 0 wc _M tw -3 sk 0
[  798.930000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 51466
global 29411 340 0 wc _M tw -11 sk 0
[  800.330000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 50431
global 28377 69 0 wc _M tw -4 sk 0
[  803.900000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 49403
global 27388 24 0 wc _M tw -2 sk 0
[  805.600000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 48377
global 26330 142 0 wc _M tw -6 sk 0
[  807.740000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 47347
global 25295 138 0 wc _M tw -1 sk 0
[  809.680000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46322
global 24296 268 0 wc _M tw -2 sk 0
[  812.120000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 45296
global 23269 81 0 wc _M tw -5 sk 0
[  813.940000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 44267
global 22249 303 0 wc _M tw -1 sk 0
[  815.940000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 43242
global 21205 220 0 wc _M tw -9 sk 0
[  817.660000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 42209
global 20174 87 0 wc _M tw -7 sk 0
[  819.430000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 41178
global 19142 31 0 wc _M tw -5 sk 0
[  820.360000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 40149
global 18113 316 0 wc _M tw -7 sk 0
[  822.310000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 39118
global 17098 85 0 wc _M tw 0 sk 0
[  824.680000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 38094
global 16064 168 0 wc _M tw 0 sk 0
[  829.250000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 37070
global 15059 44 0 wc _M tw 0 sk 0
[  832.300000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 36046
global 14001 89 0 wc _M tw -2 sk 0
[  836.030000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 35020
global 13741 0 0 wc _M tw 760 sk 0
[  836.050000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 34756
global 13649 92 0 wc _M tw 922 sk 0
[  836.290000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 34654
global 13649 0 0 wc _M tw 1022 sk 0
[  836.720000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 34652
global 13650 0 0 wc __ tw 1023 sk 0
[  843.210000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 60278
global 12631 110 0 wc __ tw 0 sk 0
[  845.380000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 59254
global 11590 72 0 wc __ tw -1 sk 0
[  852.340000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 58229
global 10566 56 0 wc __ tw -1 sk 0
[  854.360000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 57204
global 9551 103 0 wc __ tw 0 sk 0
[  857.140000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 56180
global 8529 33 0 wc __ tw 0 sk 0
[  860.800000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 55156
global 7480 509 0 wc _M tw -9 sk 0
[  863.350000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 54123
global 6443 343 0 wc _M tw -10 sk 0
[  866.020000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 53089
global 5420 215 0 wc _M tw 0 sk 0
[  870.080000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 52065
global 4393 104 0 wc _M tw 0 sk 0
[  872.210000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 51041
global 3385 334 0 wc _M tw -5 sk 0
[  874.280000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 50012
global 2343 234 0 wc _M tw 0 sk 0
[  884.350000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 48988
global 1330 52 0 wc _M tw -4 sk 0
[  889.810000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 47960
global 294 122 0 wc _M tw 0 sk 0
[note] the system is down to 116kb dirty data, but still writing back heavyly
[  905.280000] SysRq : HELP : loglevel0-8 reBoot tErm Full kIll saK
showMem Nice powerOff showPc show-all-timers(Q) unRaw Sync showTasks
Unmount shoW-blocked-tasks
[note] after a while in this state I hit SysRq+W and SysRq+M to
capture more state
[  967.770000] SysRq : Show Blocked State
[  967.770000]   task                        PC stack   pid father
[  967.770000] pdflush       D ffff810080043640     0   285      2
[  967.770000]  ffff810005d4da80 0000000000000046 ffff810005d4da48
0000007000000001
[  967.770000]  0000000000000400 0000000000000001 ffffffff80819b00
ffffffff80819b00
[  967.770000]  ffffffff80815f40 ffffffff80819b00 ffff810005d4da40
ffff810005d4da30
[  967.770000] Call Trace:
[  967.770000]  [<ffffffff805b16e7>] __down+0xa7/0x11e
[  967.770000]  [<ffffffff8022da70>] default_wake_function+0x0/0x10
[  967.770000]  [<ffffffff805b1365>] __down_failed+0x35/0x3a
[  967.770000]  [<ffffffff803752ce>] xfs_buf_lock+0x3e/0x40
[  967.770000]  [<ffffffff8037740e>] _xfs_buf_find+0x13e/0x240
[  967.770000]  [<ffffffff8037757f>] xfs_buf_get_flags+0x6f/0x190
[  967.770000]  [<ffffffff803776b2>] xfs_buf_read_flags+0x12/0xa0
[  967.770000]  [<ffffffff80368824>] xfs_trans_read_buf+0x64/0x340
[  967.770000]  [<ffffffff80352361>] xfs_itobp+0x81/0x1e0
[  967.770000]  [<ffffffff8026b293>] write_cache_pages+0x123/0x330
[  967.770000]  [<ffffffff80354d0e>] xfs_iflush+0xfe/0x520
[  967.770000]  [<ffffffff803ae5d2>] __down_read_trylock+0x42/0x60
[  967.770000]  [<ffffffff8036ed49>] xfs_inode_flush+0x179/0x1b0
[  967.770000]  [<ffffffff8037ca8f>] xfs_fs_write_inode+0x2f/0x90
[  967.770000]  [<ffffffff802b3aac>] __writeback_single_inode+0x2ac/0x380
[  967.770000]  [<ffffffff804d074e>] dm_table_any_congested+0x2e/0x80
[  967.770000]  [<ffffffff802b3f9d>] generic_sync_sb_inodes+0x20d/0x330
[  967.770000]  [<ffffffff802b4532>] writeback_inodes+0xa2/0xe0
[  967.770000]  [<ffffffff8026bfd6>] wb_kupdate+0xa6/0x140
[  967.770000]  [<ffffffff8026c4b0>] pdflush+0x0/0x1e0
[  967.770000]  [<ffffffff8026c5c0>] pdflush+0x110/0x1e0
[  967.770000]  [<ffffffff8026bf30>] wb_kupdate+0x0/0x140
[  967.770000]  [<ffffffff8024a32b>] kthread+0x4b/0x80
[  967.770000]  [<ffffffff8020c9d8>] child_rip+0xa/0x12
[  967.770000]  [<ffffffff8024a2e0>] kthread+0x0/0x80
[  967.770000]  [<ffffffff8020c9ce>] child_rip+0x0/0x12
[  967.770000]
[  968.640000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46936
global 30 0 0 wc _M tw 757 sk 0
[  968.670000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46669
global 2 28 0 wc __ tw 996 sk 0
[  970.520000] SysRq : Show Memory
[  970.530000] Mem-info:
[  970.530000] Node 0 DMA per-cpu:
[  970.530000] CPU    0: Hot: hi:    0, btch:   1 usd:   0   Cold: hi:
   0, btch:   1 usd:   0
[  970.540000] CPU    1: Hot: hi:    0, btch:   1 usd:   0   Cold: hi:
   0, btch:   1 usd:   0
[  970.540000] CPU    2: Hot: hi:    0, btch:   1 usd:   0   Cold: hi:
   0, btch:   1 usd:   0
[  970.540000] CPU    3: Hot: hi:    0, btch:   1 usd:   0   Cold: hi:
   0, btch:   1 usd:   0
[  970.540000] Node 0 DMA32 per-cpu:
[  970.540000] CPU    0: Hot: hi:  186, btch:  31 usd:  66   Cold: hi:
  62, btch:  15 usd:  15
[  970.540000] CPU    1: Hot: hi:  186, btch:  31 usd: 159   Cold: hi:
  62, btch:  15 usd:  17
[  970.540000] CPU    2: Hot: hi:  186, btch:  31 usd:   0   Cold: hi:
  62, btch:  15 usd:   0
[  970.540000] CPU    3: Hot: hi:  186, btch:  31 usd:   0   Cold: hi:
  62, btch:  15 usd:   0
[  970.540000] Node 1 DMA32 per-cpu:
[  970.540000] CPU    0: Hot: hi:  186, btch:  31 usd:  28   Cold: hi:
  62, btch:  15 usd:   0
[  970.540000] CPU    1: Hot: hi:  186, btch:  31 usd:  47   Cold: hi:
  62, btch:  15 usd:   0
[  970.540000] CPU    2: Hot: hi:  186, btch:  31 usd: 155   Cold: hi:
  62, btch:  15 usd:  12
[  970.540000] CPU    3: Hot: hi:  186, btch:  31 usd: 183   Cold: hi:
  62, btch:  15 usd:   3
[  970.540000] Node 1 Normal per-cpu:
[  970.540000] CPU    0: Hot: hi:  186, btch:  31 usd:   0   Cold: hi:
  62, btch:  15 usd:   0
[  970.540000] CPU    1: Hot: hi:  186, btch:  31 usd:   0   Cold: hi:
  62, btch:  15 usd:   0
[  970.540000] CPU    2: Hot: hi:  186, btch:  31 usd: 118   Cold: hi:
  62, btch:  15 usd:  19
[  970.540000] CPU    3: Hot: hi:  186, btch:  31 usd: 163   Cold: hi:
  62, btch:  15 usd:  13
[note] I do think, that /proc/meminfo also showed only 8kb of dirty
remaining at this point, but I'm not 200% sure...
[  970.540000] Active:70883 inactive:117017 dirty:2 writeback:0 unstable:0
[  970.540000]  free:787733 slab:25286 mapped:12000 pagetables:2237 bounce:0
[  970.540000] Node 0 DMA free:9448kB min:16kB low:20kB high:24kB
active:0kB inactive:0kB present:8868kB pages_scanned:0
all_unreclaimable? no
[  970.540000] lowmem_reserve[]: 0 2004 2004 2004
[  970.540000] Node 0 DMA32 free:1465640kB min:4040kB low:5048kB
high:6060kB active:132340kB inactive:310048kB present:2052320kB
pages_scanned:0 all_unreclaimable? no
[  970.540000] lowmem_reserve[]: 0 0 0 0
[  970.540000] Node 1 DMA32 free:1476216kB min:3040kB low:3800kB
high:4560kB active:3528kB inactive:41952kB present:1544000kB
pages_scanned:0 all_unreclaimable? no
[  970.540000] lowmem_reserve[]: 0 0 505 505
[  970.540000] Node 1 Normal free:199628kB min:1016kB low:1268kB
high:1524kB active:147664kB inactive:116068kB present:517120kB
pages_scanned:0 all_unreclaimable? no
[  970.540000] lowmem_reserve[]: 0 0 0 0
[  970.540000] Node 0 DMA: 6*4kB 6*8kB 4*16kB 5*32kB 3*64kB 2*128kB
4*256kB 1*512kB 1*1024kB 1*2048kB 1*4096kB = 9448kB
[  970.540000] Node 0 DMA32: 158*4kB 66*8kB 30*16kB 22*32kB 10*64kB
7*128kB 6*256kB 4*512kB 6*1024kB 5*2048kB 352*4096kB = 1465640kB
[  970.540000] Node 1 DMA32: 866*4kB 446*8kB 228*16kB 122*32kB 50*64kB
32*128kB 23*256kB 17*512kB 16*1024kB 11*2048kB 342*4096kB = 1476216kB
[  970.540000] Node 1 Normal: 511*4kB 618*8kB 471*16kB 325*32kB
185*64kB 92*128kB 72*256kB 55*512kB 38*1024kB 26*2048kB 3*4096kB =
199580kB
[  970.540000] Swap cache: add 0, delete 0, find 0/0, race 0+0
[  970.540000] Free swap  = 9775416kB
[  970.540000] Total swap = 9775416kB
[  970.540000] Free swap:       9775416kB
[  970.570000] 1048576 pages of RAM
[  970.570000] 35174 reserved pages
[  970.570000] 149150 pages shared
[  970.570000] 0 pages swap cached
[ 1137.110000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46642
global 1 0 0 wc _M tw 1022 sk 0
[ 1137.110000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46640
global 1 0 0 wc __ tw 1022 sk 0
[ 1138.110000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46640
global 1 0 0 wc __ tw 1024 sk 0
[ 1143.110000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46640
global 1 0 0 wc __ tw 1024 sk 0
[ 1148.110000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46640
global 1 0 0 wc __ tw 1024 sk 0
[note] finally the disks go idle
[ 1149.020000] SysRq : HELP : loglevel0-8 reBoot tErm Full kIll saK
showMem Nice powerOff showPc show-all-timers(Q) unRaw Sync showTasks
Unmount shoW-blocked-tasks
[ 1153.110000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46641
global 2 0 0 wc __ tw 1024 sk 0
[ 1158.110000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46641
global 2 0 0 wc __ tw 1024 sk 0
[ 1163.110000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46641
global 2 0 0 wc __ tw 1024 sk 0
[ 1168.110000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46641
global 2 0 0 wc _M tw 1023 sk 0
[ 1168.160000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46640
global 2 0 0 wc _M tw 1023 sk 0
[ 1168.180000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46639
global 2 0 0 wc __ tw 1023 sk 0
[ 1173.110000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46640
global 1 0 0 wc __ tw 1024 sk 0
[ 1178.110000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46640
global 1 0 0 wc __ tw 1024 sk 0
[ 1183.110000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 46640
global 1 0 0 wc __ tw 1024 sk 0

Torsten

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-02 19:22                                         ` Torsten Kaiser
@ 2007-11-02 20:43                                           ` David Chinner
  2007-11-02 21:02                                             ` Torsten Kaiser
  2007-11-04 11:19                                             ` Torsten Kaiser
       [not found]                                           ` <394340668.31055@ustc.edu.cn>
  1 sibling, 2 replies; 61+ messages in thread
From: David Chinner @ 2007-11-02 20:43 UTC (permalink / raw)
  To: Torsten Kaiser
  Cc: Peter Zijlstra, Fengguang Wu, Maxim Levitsky, linux-kernel,
	Andrew Morton, David Chinner, linux-fsdevel, xfs

On Fri, Nov 02, 2007 at 08:22:10PM +0100, Torsten Kaiser wrote:
> [  630.000000] SysRq : Emergency Sync
> [  630.120000] Emergency Sync complete
> [  632.850000] SysRq : Show Blocked State
> [  632.850000]   task                        PC stack   pid father
> [  632.850000] pdflush       D ffff81000f091788     0   285      2
> [  632.850000]  ffff810005d4da80 0000000000000046 0000000000000800
> 0000007000000001
> [  632.850000]  ffff81000fd52400 ffffffff8022d61c ffffffff80819b00
> ffffffff80819b00
> [  632.850000]  ffffffff80815f40 ffffffff80819b00 ffff810100316f98
> 0000000000000000
> [  632.850000] Call Trace:
> [  632.850000]  [<ffffffff8022d61c>] task_rq_lock+0x4c/0x90
> [  632.850000]  [<ffffffff8022c8ea>] __wake_up_common+0x5a/0x90
> [  632.850000]  [<ffffffff805b16e7>] __down+0xa7/0x11e
> [  632.850000]  [<ffffffff8022da70>] default_wake_function+0x0/0x10
> [  632.850000]  [<ffffffff805b1365>] __down_failed+0x35/0x3a
> [  632.850000]  [<ffffffff803752ce>] xfs_buf_lock+0x3e/0x40
> [  632.850000]  [<ffffffff8037740e>] _xfs_buf_find+0x13e/0x240
> [  632.850000]  [<ffffffff8037757f>] xfs_buf_get_flags+0x6f/0x190
> [  632.850000]  [<ffffffff803776b2>] xfs_buf_read_flags+0x12/0xa0
> [  632.850000]  [<ffffffff80368824>] xfs_trans_read_buf+0x64/0x340
> [  632.850000]  [<ffffffff80352361>] xfs_itobp+0x81/0x1e0
> [  632.850000]  [<ffffffff8026b293>] write_cache_pages+0x123/0x330
> [  632.850000]  [<ffffffff80354d0e>] xfs_iflush+0xfe/0x520

That's stalled waiting on the inode cluster buffer lock. That implies
that the inode lcuser is already being written out and the inode has
been redirtied during writeout.

Does the kernel you are testing have the "flush inodes in ascending
inode number order" patches applied? If so, can you remove that
patch and see if the problem goes away?

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-02 20:43                                           ` David Chinner
@ 2007-11-02 21:02                                             ` Torsten Kaiser
  2007-11-04 11:19                                             ` Torsten Kaiser
  1 sibling, 0 replies; 61+ messages in thread
From: Torsten Kaiser @ 2007-11-02 21:02 UTC (permalink / raw)
  To: David Chinner
  Cc: Peter Zijlstra, Fengguang Wu, Maxim Levitsky, linux-kernel,
	Andrew Morton, linux-fsdevel, xfs

On 11/2/07, David Chinner <dgc@sgi.com> wrote:
> On Fri, Nov 02, 2007 at 08:22:10PM +0100, Torsten Kaiser wrote:
> > [  630.000000] SysRq : Emergency Sync
> > [  630.120000] Emergency Sync complete
> > [  632.850000] SysRq : Show Blocked State
> > [  632.850000]   task                        PC stack   pid father
> > [  632.850000] pdflush       D ffff81000f091788     0   285      2
> > [  632.850000]  ffff810005d4da80 0000000000000046 0000000000000800
> > 0000007000000001
> > [  632.850000]  ffff81000fd52400 ffffffff8022d61c ffffffff80819b00
> > ffffffff80819b00
> > [  632.850000]  ffffffff80815f40 ffffffff80819b00 ffff810100316f98
> > 0000000000000000
> > [  632.850000] Call Trace:
> > [  632.850000]  [<ffffffff8022d61c>] task_rq_lock+0x4c/0x90
> > [  632.850000]  [<ffffffff8022c8ea>] __wake_up_common+0x5a/0x90
> > [  632.850000]  [<ffffffff805b16e7>] __down+0xa7/0x11e
> > [  632.850000]  [<ffffffff8022da70>] default_wake_function+0x0/0x10
> > [  632.850000]  [<ffffffff805b1365>] __down_failed+0x35/0x3a
> > [  632.850000]  [<ffffffff803752ce>] xfs_buf_lock+0x3e/0x40
> > [  632.850000]  [<ffffffff8037740e>] _xfs_buf_find+0x13e/0x240
> > [  632.850000]  [<ffffffff8037757f>] xfs_buf_get_flags+0x6f/0x190
> > [  632.850000]  [<ffffffff803776b2>] xfs_buf_read_flags+0x12/0xa0
> > [  632.850000]  [<ffffffff80368824>] xfs_trans_read_buf+0x64/0x340
> > [  632.850000]  [<ffffffff80352361>] xfs_itobp+0x81/0x1e0
> > [  632.850000]  [<ffffffff8026b293>] write_cache_pages+0x123/0x330
> > [  632.850000]  [<ffffffff80354d0e>] xfs_iflush+0xfe/0x520
>
> That's stalled waiting on the inode cluster buffer lock. That implies
> that the inode lcuser is already being written out and the inode has
> been redirtied during writeout.
>
> Does the kernel you are testing have the "flush inodes in ascending
> inode number order" patches applied? If so, can you remove that
> patch and see if the problem goes away?

It's 2.6.23-mm1 with only some small fixes.

In it's broken-out directory I see:
git-xfs.patch

and

writeback-fix-periodic-superblock-dirty-inode-flushing.patch
writeback-fix-time-ordering-of-the-per-superblock-dirty-inode-lists-2.patch
writeback-fix-time-ordering-of-the-per-superblock-dirty-inode-lists-3.patch
writeback-fix-time-ordering-of-the-per-superblock-dirty-inode-lists-4.patch
writeback-fix-time-ordering-of-the-per-superblock-dirty-inode-lists-5.patch
writeback-fix-time-ordering-of-the-per-superblock-dirty-inode-lists-6.patch
writeback-fix-time-ordering-of-the-per-superblock-dirty-inode-lists-7.patch
writeback-fix-time-ordering-of-the-per-superblock-dirty-inode-lists.patch
writeback-fix-time-ordering-of-the-per-superblock-inode-lists-8.patch
writeback-introduce-writeback_controlmore_io-to-indicate-more-io.patch

I don't know if the patch you mentioned is part of that version of the
mm-patchset.

Torsten

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-02 20:43                                           ` David Chinner
  2007-11-02 21:02                                             ` Torsten Kaiser
@ 2007-11-04 11:19                                             ` Torsten Kaiser
  2007-11-05  1:45                                               ` David Chinner
  1 sibling, 1 reply; 61+ messages in thread
From: Torsten Kaiser @ 2007-11-04 11:19 UTC (permalink / raw)
  To: David Chinner
  Cc: Peter Zijlstra, Fengguang Wu, Maxim Levitsky, linux-kernel,
	Andrew Morton, linux-fsdevel, xfs

[-- Attachment #1: Type: text/plain, Size: 7465 bytes --]

On 11/2/07, David Chinner <dgc@sgi.com> wrote:
> That's stalled waiting on the inode cluster buffer lock. That implies
> that the inode lcuser is already being written out and the inode has
> been redirtied during writeout.
>
> Does the kernel you are testing have the "flush inodes in ascending
> inode number order" patches applied? If so, can you remove that
> patch and see if the problem goes away?

I can now confirm, that I see this also with the current mainline-git-version
I used 2.6.24-rc1-git-b4f555081fdd27d13e6ff39d455d5aefae9d2c0c
plus the fix for the sg changes in ieee1394.
Bisecting would be troublesome, as the sg changes prevent mainline to
boot with my normal config / kill my network.

treogen ~ # vmstat 10
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
-> starting emerge
 1  0      0 3627072    332 157724    0    0    97    13   41  189  2  2 94  2
 0  0      0 3607240    332 163736    0    0   599    10  332  951  2  1 93  4
 0  0      0 3601920    332 167592    0    0   380     2  218  870  1  1 98  0
 0  0      0 3596356    332 171648    0    0   404    21  182  818  0  0 99  0
 0  0      0 3579328    332 180436    0    0   878    12  147  912  1  1 97  2
 0  0      0 3575376    332 182776    0    0   236     4  244  953  1  1 95  3
 2  1      0 3571792    332 185084    0    0   232     7  256 1003  2  1 95  2
 0  0      0 3564844    332 187364    0    0   228   605  246 1167  2  1 93  4
 0  0      0 3562128    332 189784    0    0   230     4  527 1238  2  1 93  4
 0  1      0 3558764    332 191964    0    0   216    24  438 1059  1  1 93  6
 0  0      0 3555120    332 193868    0    0   199    36  406  959  0  0 92  8
 0  0      0 3552008    332 195928    0    0   197    11  458 1023  1  1 90  8
 0  0      0 3548728    332 197660    0    0   183     7  496 1086  1  1 90  8
 0  0      0 3545560    332 199372    0    0   170     8  483 1017  1  1 90  9
 0  1      0 3542124    332 201256    0    0   190     1  544 1137  1  1 88 10
 1  0      0 3536924    332 203296    0    0   195     7  637 1209  2  1 89  8
 1  1      0 3485096    332 249184    0    0   101    16 10372 4537 13  3 76  8
 2  0      0 3442004    332 279728    0    0  1086    40  219 1349  7  3 87  4
-> emerge is done reading its package database
 1  0      0 3254796    332 448636    0    0     0    27  128 8360 24  6 70  0
 2  0      0 3143304    332 554016    0    0    47    33  213 4480 16 11 72  1
-> kernel unpacked
 1  0      0 3125700    332 560416    0    0     1    20  122 1675 24  1 75  0
 1  0      0 3117356    332 567968    0    0     0   674  157 2975 24  2 73  1
 2  0      0 3111636    332 573736    0    0     0  1143  151 1924 23  1 75  1
 2  0      0 3102836    332 581332    0    0     0   890  153 1330 24  1 75  0
 1  0      0 3097236    332 587360    0    0     0   656  194 1593 24  1 74  0
 1  0      0 3086824    332 595480    0    0     0   812  235 2657 25  1 74  0
-> tar.bz2 created, installing starts now
 0  0      0 3091612    332 601024    0    0    82   708  499 2397 17  4 78  1
 0  0      0 3086088    332 602180    0    0    69  2459  769 2237  3  4 88  6
 0  0      0 3085916    332 602236    0    0     2  1752  693  949  1  2 96  1
 0  0      0 3084544    332 603564    0    0    66  4057 1176 2850  3  6 91  0
 0  0      0 3078780    332 605572    0    0    98  3194 1169 3288  5  6 89  0
 0  0      0 3077940    332 605924    0    0    17  1139  823 1547  1  2 97  0
 0  0      0 3078268    332 605924    0    0     0   888  807 1329  0  1 99  0
-> first short stall
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 0  0      0 3077040    332 605924    0    0     0  1950  785 1495  0  2 89  8
 0  0      0 3076588    332 605896    0    0     2  3807  925 2046  1  4 95  0
 0  0      0 3076900    332 606052    0    0    11  2564  768 1471  1  3 95  1
 0  0      0 3071584    332 607928    0    0    87  2499 1108 3433  4  6 90  0
-> second longer stall
(emerge was not able to complete a single filemove until the 'resume' line)
 0  0      0 3071592    332 607928    0    0     0   693  692 1289  0  0 99  0
 0  0      0 3072584    332 607928    0    0     0   792  731 1507  0  1 99  0
 0  0      0 3072840    332 607928    0    0     0   806  707 1521  0  1 99  0
 0  0      0 3072724    332 607928    0    0     0   782  695 1372  0  0 99  0
 0  0      0 3072972    332 607928    0    0     0   677  612 1301  0  0 99  0
 0  0      0 3072772    332 607928    0    0     0   738  681 1352  1  1 99  0
 0  0      0 3073020    332 607928    0    0     0   785  708 1328  0  1 99  0
 0  0      0 3072896    332 607928    0    0     0   833  722 1383  0  0 99  0
-> emerge resumed
 0  0      0 3069476    332 607972    0    0     2  4885  812 2062  1  4 90  5
 1  0      0 3069648    332 608068    0    0     4  4658  833 2158  1  4 93  2
 0  0      0 3064972    332 610364    0    0   106  2494 1095 3620  5  7 88  0
 0  0      0 3057536    332 612444    0    0    86  2023 1012 3440  4  6 90  0
 1  0      0 3054572    332 612368    0    0   102  1526 1024 2277  6  5 87  2
-> emerge finished, but still >100Mb of dirty data accoring to /proc/meminfo
 0  0      0 3048548    332 615764    0    0   337   659  796 1000  3  1 96  0
 0  0      0 3092100    332 615860    0    0    15   616  606 1040  1  0 99  0
 0  0      0 3092148    332 615860    0    0     0   641  622 1085  0  0 99  0
 0  0      0 3092528    332 615860    0    0     0   766  654 1055  1  1 99  0
-> slow writeout until here, might be fixed with Peters patch to scale
the background threshold
 2  0      0 3090828    332 615860    0    0     0  1804  707 1215  0  2 98  0
 0  0      0 3091056    332 615864    0    0     0  3877  831 2047  1  4 94  1
 3  0      0 3090780    332 615864    0    0     0  2048  784 1154  1  2 97  1
 0  0      0 3091096    332 615864    0    0     0  2690  751 1538  0  3 96  1
 0  1      0 3091056    332 615864    0    0     0  2018  748  866  0  2 95  2
 2  0      0 3092960    332 615864    0    0     0  2076  719 1118  0  2 97  0
-> writeout "done", /proc/meminfo showed 0kb of dirty data remaining
 0  0      0 3093072    332 615864    0    0     0   645  646 1104  0  0 99  0
 0  0      0 3093532    332 615864    0    0     0   726  658 1223  0  1 99  0
 0  0      0 3093540    332 615864    0    0     0   801  699 1314  0  1 99  0
 0  0      0 3093580    332 615864    0    0     0   783  738 1350  0  1 99  0
 0  0      0 3093284    332 615920    0    0     6   746  655 1381  1  1 98  0
 0  0      0 3092872    332 615920    0    0     0   862  703 1391  1  1 98  0
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 0  0      0 3093224    332 615920    0    0     0   799  676 1394  0  0 99  0
 0  0      0 3093304    332 615920    0    0     0   835  672 1514  1  1 98  0
 0  0      0 3093476    332 615920    0    0     0   784  641 1404  1  1 98  0
 0  0      0 3093264    332 615920    0    0     0   722  626 1483  1  1 99  0
 0  0      0 3093476    332 615920    0    0     0     7  328  350  0  0 99  0
 0  0      0 3093628    332 615920    0    0     0    11  332  407  0  0 99  0
-> disks finally go idle

Torsten

.config for 2.6.24-rc1+git attached

[-- Attachment #2: config.gz --]
[-- Type: application/x-gzip, Size: 11732 bytes --]

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-04 11:19                                             ` Torsten Kaiser
@ 2007-11-05  1:45                                               ` David Chinner
  2007-11-05  7:01                                                 ` Torsten Kaiser
  2007-11-05 18:27                                                 ` Torsten Kaiser
  0 siblings, 2 replies; 61+ messages in thread
From: David Chinner @ 2007-11-05  1:45 UTC (permalink / raw)
  To: Torsten Kaiser
  Cc: David Chinner, Peter Zijlstra, Fengguang Wu, Maxim Levitsky,
	linux-kernel, Andrew Morton, linux-fsdevel, xfs

[-- Attachment #1: Type: text/plain, Size: 1083 bytes --]

On Sun, Nov 04, 2007 at 12:19:19PM +0100, Torsten Kaiser wrote:
> On 11/2/07, David Chinner <dgc@sgi.com> wrote:
> > That's stalled waiting on the inode cluster buffer lock. That implies
> > that the inode lcuser is already being written out and the inode has
> > been redirtied during writeout.
> >
> > Does the kernel you are testing have the "flush inodes in ascending
> > inode number order" patches applied? If so, can you remove that
> > patch and see if the problem goes away?
> 
> I can now confirm, that I see this also with the current mainline-git-version
> I used 2.6.24-rc1-git-b4f555081fdd27d13e6ff39d455d5aefae9d2c0c
> plus the fix for the sg changes in ieee1394.

Ok, so it's probably a side effect of the writeback changes.

Attached are two patches (two because one was in a separate patchset as
a standalone change) that should prevent async writeback from blocking
on locked inode cluster buffers. Apply the xfs-factor-inotobp patch first.
Can you see if this fixes the problem?

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group



[-- Attachment #2: xfs-factor-inotobp --]
[-- Type: text/plain, Size: 9595 bytes --]

---
 fs/xfs/xfs_inode.c |  283 ++++++++++++++++++++++++-----------------------------
 1 file changed, 129 insertions(+), 154 deletions(-)

Index: 2.6.x-xfs-new/fs/xfs/xfs_inode.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_inode.c	2007-09-12 15:41:22.000000000 +1000
+++ 2.6.x-xfs-new/fs/xfs/xfs_inode.c	2007-09-13 08:57:06.395641940 +1000
@@ -124,6 +124,126 @@ xfs_inobp_check(
 #endif
 
 /*
+ * Simple wrapper for calling xfs_imap() that includes error
+ * and bounds checking
+ */
+STATIC int
+xfs_ino_to_imap(
+	xfs_mount_t	*mp,
+	xfs_trans_t	*tp,
+	xfs_ino_t	ino,
+	xfs_imap_t	*imap,
+	uint		imap_flags)
+{
+	int		error;
+
+	error = xfs_imap(mp, tp, ino, imap, imap_flags);
+	if (error) {
+		cmn_err(CE_WARN, "xfs_ino_to_imap: xfs_imap()  returned an "
+				"error %d on %s.  Returning error.",
+				error, mp->m_fsname);
+		return error;
+	}
+
+	/*
+	 * If the inode number maps to a block outside the bounds
+	 * of the file system then return NULL rather than calling
+	 * read_buf and panicing when we get an error from the
+	 * driver.
+	 */
+	if ((imap->im_blkno + imap->im_len) >
+	    XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
+		xfs_fs_cmn_err(CE_ALERT, mp, "xfs_ino_to_imap: "
+			"(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > "
+			" XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)",
+			(unsigned long long) imap->im_blkno,
+			(unsigned long long) imap->im_len,
+			XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
+		return XFS_ERROR(EINVAL);
+	}
+	return 0;
+}
+
+/*
+ * Find the buffer associated with the given inode map
+ * We do basic validation checks on the buffer once it has been
+ * retrieved from disk.
+ */
+STATIC int
+xfs_imap_to_bp(
+	xfs_mount_t	*mp,
+	xfs_trans_t	*tp,
+	xfs_imap_t	*imap,
+	xfs_buf_t	**bpp,
+	uint		buf_flags,
+	uint		imap_flags)
+{
+	int		error;
+	int		i;
+	int		ni;
+	xfs_buf_t	*bp;
+
+	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
+				   (int)imap->im_len, XFS_BUF_LOCK, &bp);
+	if (error) {
+		cmn_err(CE_WARN, "xfs_imap_to_bp: xfs_trans_read_buf()returned "
+				"an error %d on %s.  Returning error.",
+				error, mp->m_fsname);
+		return error;
+	}
+
+	/*
+	 * Validate the magic number and version of every inode in the buffer
+	 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
+	 */
+#ifdef DEBUG
+	ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog;
+#else	/* usual case */
+	ni = 1;
+#endif
+
+	for (i = 0; i < ni; i++) {
+		int		di_ok;
+		xfs_dinode_t	*dip;
+
+		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+					(i << mp->m_sb.sb_inodelog));
+		di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC &&
+			    XFS_DINODE_GOOD_VERSION(dip->di_core.di_version);
+		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
+						XFS_ERRTAG_ITOBP_INOTOBP,
+						XFS_RANDOM_ITOBP_INOTOBP))) {
+			if (imap_flags & XFS_IMAP_BULKSTAT) {
+				xfs_trans_brelse(tp, bp);
+				return XFS_ERROR(EINVAL);
+			}
+			XFS_CORRUPTION_ERROR("xfs_imap_to_bp",
+						XFS_ERRLEVEL_HIGH, mp, dip);
+#ifdef DEBUG
+			cmn_err(CE_PANIC,
+					"Device %s - bad inode magic/vsn "
+					"daddr %lld #%d (magic=%x)",
+				XFS_BUFTARG_NAME(mp->m_ddev_targp),
+				(unsigned long long)imap->im_blkno, i,
+				be16_to_cpu(dip->di_core.di_magic));
+#endif
+			xfs_trans_brelse(tp, bp);
+			return XFS_ERROR(EFSCORRUPTED);
+		}
+	}
+
+	xfs_inobp_check(mp, bp);
+
+	/*
+	 * Mark the buffer as an inode buffer now that it looks good
+	 */
+	XFS_BUF_SET_VTYPE(bp, B_FS_INO);
+
+	*bpp = bp;
+	return 0;
+}
+
+/*
  * This routine is called to map an inode number within a file
  * system to the buffer containing the on-disk version of the
  * inode.  It returns a pointer to the buffer containing the
@@ -145,72 +265,19 @@ xfs_inotobp(
 	xfs_buf_t	**bpp,
 	int		*offset)
 {
-	int		di_ok;
 	xfs_imap_t	imap;
 	xfs_buf_t	*bp;
 	int		error;
-	xfs_dinode_t	*dip;
 
-	/*
-	 * Call the space management code to find the location of the
-	 * inode on disk.
-	 */
 	imap.im_blkno = 0;
-	error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP);
-	if (error != 0) {
-		cmn_err(CE_WARN,
-	"xfs_inotobp: xfs_imap()  returned an "
-	"error %d on %s.  Returning error.", error, mp->m_fsname);
+	error = xfs_ino_to_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP);
+	if (error)
 		return error;
-	}
-
-	/*
-	 * If the inode number maps to a block outside the bounds of the
-	 * file system then return NULL rather than calling read_buf
-	 * and panicing when we get an error from the driver.
-	 */
-	if ((imap.im_blkno + imap.im_len) >
-	    XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
-		cmn_err(CE_WARN,
-	"xfs_inotobp: inode number (%llu + %d) maps to a block outside the bounds "
-	"of the file system %s.  Returning EINVAL.",
-			(unsigned long long)imap.im_blkno,
-			imap.im_len, mp->m_fsname);
-		return XFS_ERROR(EINVAL);
-	}
-
-	/*
-	 * Read in the buffer.  If tp is NULL, xfs_trans_read_buf() will
-	 * default to just a read_buf() call.
-	 */
-	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
-				   (int)imap.im_len, XFS_BUF_LOCK, &bp);
 
-	if (error) {
-		cmn_err(CE_WARN,
-	"xfs_inotobp: xfs_trans_read_buf()  returned an "
-	"error %d on %s.  Returning error.", error, mp->m_fsname);
+	error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0);
+	if (error)
 		return error;
-	}
-	dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0);
-	di_ok =
-		be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC &&
-		XFS_DINODE_GOOD_VERSION(dip->di_core.di_version);
-	if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP,
-			XFS_RANDOM_ITOBP_INOTOBP))) {
-		XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip);
-		xfs_trans_brelse(tp, bp);
-		cmn_err(CE_WARN,
-	"xfs_inotobp: XFS_TEST_ERROR()  returned an "
-	"error on %s.  Returning EFSCORRUPTED.",  mp->m_fsname);
-		return XFS_ERROR(EFSCORRUPTED);
-	}
-
-	xfs_inobp_check(mp, bp);
 
-	/*
-	 * Set *dipp to point to the on-disk inode in the buffer.
-	 */
 	*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
 	*bpp = bp;
 	*offset = imap.im_boffset;
@@ -251,41 +318,15 @@ xfs_itobp(
 	xfs_imap_t	imap;
 	xfs_buf_t	*bp;
 	int		error;
-	int		i;
-	int		ni;
 
 	if (ip->i_blkno == (xfs_daddr_t)0) {
-		/*
-		 * Call the space management code to find the location of the
-		 * inode on disk.
-		 */
 		imap.im_blkno = bno;
-		if ((error = xfs_imap(mp, tp, ip->i_ino, &imap,
-					XFS_IMAP_LOOKUP | imap_flags)))
+		error = xfs_ino_to_imap(mp, tp, ip->i_ino, &imap,
+					XFS_IMAP_LOOKUP | imap_flags);
+		if (error)
 			return error;
 
 		/*
-		 * If the inode number maps to a block outside the bounds
-		 * of the file system then return NULL rather than calling
-		 * read_buf and panicing when we get an error from the
-		 * driver.
-		 */
-		if ((imap.im_blkno + imap.im_len) >
-		    XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
-#ifdef DEBUG
-			xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
-					"(imap.im_blkno (0x%llx) "
-					"+ imap.im_len (0x%llx)) > "
-					" XFS_FSB_TO_BB(mp, "
-					"mp->m_sb.sb_dblocks) (0x%llx)",
-					(unsigned long long) imap.im_blkno,
-					(unsigned long long) imap.im_len,
-					XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
-#endif /* DEBUG */
-			return XFS_ERROR(EINVAL);
-		}
-
-		/*
 		 * Fill in the fields in the inode that will be used to
 		 * map the inode to its buffer from now on.
 		 */
@@ -303,76 +344,10 @@ xfs_itobp(
 	}
 	ASSERT(bno == 0 || bno == imap.im_blkno);
 
-	/*
-	 * Read in the buffer.  If tp is NULL, xfs_trans_read_buf() will
-	 * default to just a read_buf() call.
-	 */
-	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
-				   (int)imap.im_len, XFS_BUF_LOCK, &bp);
-	if (error) {
-#ifdef DEBUG
-		xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
-				"xfs_trans_read_buf() returned error %d, "
-				"imap.im_blkno 0x%llx, imap.im_len 0x%llx",
-				error, (unsigned long long) imap.im_blkno,
-				(unsigned long long) imap.im_len);
-#endif /* DEBUG */
+	error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags);
+	if (error)
 		return error;
-	}
-
-	/*
-	 * Validate the magic number and version of every inode in the buffer
-	 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
-	 * No validation is done here in userspace (xfs_repair).
-	 */
-#if !defined(__KERNEL__)
-	ni = 0;
-#elif defined(DEBUG)
-	ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog;
-#else	/* usual case */
-	ni = 1;
-#endif
-
-	for (i = 0; i < ni; i++) {
-		int		di_ok;
-		xfs_dinode_t	*dip;
-
-		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
-					(i << mp->m_sb.sb_inodelog));
-		di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC &&
-			    XFS_DINODE_GOOD_VERSION(dip->di_core.di_version);
-		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
-						XFS_ERRTAG_ITOBP_INOTOBP,
-						XFS_RANDOM_ITOBP_INOTOBP))) {
-			if (imap_flags & XFS_IMAP_BULKSTAT) {
-				xfs_trans_brelse(tp, bp);
-				return XFS_ERROR(EINVAL);
-			}
-#ifdef DEBUG
-			cmn_err(CE_ALERT,
-					"Device %s - bad inode magic/vsn "
-					"daddr %lld #%d (magic=%x)",
-				XFS_BUFTARG_NAME(mp->m_ddev_targp),
-				(unsigned long long)imap.im_blkno, i,
-				be16_to_cpu(dip->di_core.di_magic));
-#endif
-			XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH,
-					     mp, dip);
-			xfs_trans_brelse(tp, bp);
-			return XFS_ERROR(EFSCORRUPTED);
-		}
-	}
-
-	xfs_inobp_check(mp, bp);
 
-	/*
-	 * Mark the buffer as an inode buffer now that it looks good
-	 */
-	XFS_BUF_SET_VTYPE(bp, B_FS_INO);
-
-	/*
-	 * Set *dipp to point to the on-disk inode in the buffer.
-	 */
 	*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
 	*bpp = bp;
 	return 0;

[-- Attachment #3: xfs-iflush-blocking-fix --]
[-- Type: text/plain, Size: 6403 bytes --]

---
 fs/xfs/linux-2.6/xfs_super.c |    3 +-
 fs/xfs/linux-2.6/xfs_vnode.h |    5 ---
 fs/xfs/xfs_inode.c           |   33 ++++++++++++++++---------
 fs/xfs/xfs_inode.h           |    7 +++--
 fs/xfs/xfs_vnodeops.c        |   55 +++++++++----------------------------------
 5 files changed, 41 insertions(+), 62 deletions(-)

Index: 2.6.x-xfs-new/fs/xfs/xfs_inode.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_inode.c	2007-11-05 10:17:36.000000000 +1100
+++ 2.6.x-xfs-new/fs/xfs/xfs_inode.c	2007-11-05 10:33:49.590268027 +1100
@@ -306,14 +306,15 @@ xfs_inotobp(
  * 0 for the disk block address.
  */
 int
-xfs_itobp(
+xfs_itobp_flags(
 	xfs_mount_t	*mp,
 	xfs_trans_t	*tp,
 	xfs_inode_t	*ip,
 	xfs_dinode_t	**dipp,
 	xfs_buf_t	**bpp,
 	xfs_daddr_t	bno,
-	uint		imap_flags)
+	uint		imap_flags,
+	uint		buf_flags)
 {
 	xfs_imap_t	imap;
 	xfs_buf_t	*bp;
@@ -344,10 +345,17 @@ xfs_itobp(
 	}
 	ASSERT(bno == 0 || bno == imap.im_blkno);
 
-	error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags);
+	error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags);
 	if (error)
 		return error;
 
+	if (!bp) {
+		ASSERT(buf_flags & XFS_BUF_TRYLOCK);
+		ASSERT(tp == NULL);
+		*bpp = NULL;
+		return EAGAIN;
+	}
+
 	*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
 	*bpp = bp;
 	return 0;
@@ -3068,15 +3076,6 @@ xfs_iflush(
 	}
 
 	/*
-	 * Get the buffer containing the on-disk inode.
-	 */
-	error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0);
-	if (error) {
-		xfs_ifunlock(ip);
-		return error;
-	}
-
-	/*
 	 * Decide how buffer will be flushed out.  This is done before
 	 * the call to xfs_iflush_int because this field is zeroed by it.
 	 */
@@ -3125,6 +3124,16 @@ xfs_iflush(
 	}
 
 	/*
+	 * Get the buffer containing the on-disk inode.
+	 */
+	error = xfs_itobp_flags(mp, NULL, ip, &dip, &bp, 0, 0,
+			(flags == INT_ASYNC) ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK);
+	if (error ||!bp) {
+		xfs_ifunlock(ip);
+		return error;
+	}
+
+	/*
 	 * First flush out the inode that xfs_iflush was called with.
 	 */
 	error = xfs_iflush_int(ip, bp);
Index: 2.6.x-xfs-new/fs/xfs/xfs_inode.h
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_inode.h	2007-11-02 13:44:46.000000000 +1100
+++ 2.6.x-xfs-new/fs/xfs/xfs_inode.h	2007-11-05 10:25:44.885153248 +1100
@@ -488,9 +488,12 @@ int		xfs_finish_reclaim_all(struct xfs_m
 /*
  * xfs_inode.c prototypes.
  */
-int		xfs_itobp(struct xfs_mount *, struct xfs_trans *,
+int		xfs_itobp_flags(struct xfs_mount *, struct xfs_trans *,
 			  xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **,
-			  xfs_daddr_t, uint);
+			  xfs_daddr_t, uint, uint);
+#define xfs_itobp(mp, tp, ip, dipp, bpp, bno, iflags)	\
+	xfs_itobp_flags(mp, tp, ip, dipp, bpp, bno, iflags, XFS_BUF_LOCK)
+
 int		xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
 			  xfs_inode_t **, xfs_daddr_t, uint);
 int		xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
Index: 2.6.x-xfs-new/fs/xfs/linux-2.6/xfs_super.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/linux-2.6/xfs_super.c	2007-11-02 13:44:50.000000000 +1100
+++ 2.6.x-xfs-new/fs/xfs/linux-2.6/xfs_super.c	2007-11-05 10:39:05.969204451 +1100
@@ -840,7 +840,8 @@ xfs_fs_write_inode(
 	struct inode		*inode,
 	int			sync)
 {
-	int			error = 0, flags = FLUSH_INODE;
+	int			error = 0;
+	int			flags = 0;
 
 	xfs_itrace_entry(XFS_I(inode));
 	if (sync) {
Index: 2.6.x-xfs-new/fs/xfs/linux-2.6/xfs_vnode.h
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/linux-2.6/xfs_vnode.h	2007-10-02 16:01:47.000000000 +1000
+++ 2.6.x-xfs-new/fs/xfs/linux-2.6/xfs_vnode.h	2007-11-05 10:40:49.103817818 +1100
@@ -73,12 +73,9 @@ typedef enum bhv_vrwlock {
 #define IO_INVIS	0x00020		/* don't update inode timestamps */
 
 /*
- * Flags for vop_iflush call
+ * Flags for xfs_inode_flush
  */
 #define FLUSH_SYNC		1	/* wait for flush to complete	*/
-#define FLUSH_INODE		2	/* flush the inode itself	*/
-#define FLUSH_LOG		4	/* force the last log entry for
-					 * this inode out to disk	*/
 
 /*
  * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
Index: 2.6.x-xfs-new/fs/xfs/xfs_vnodeops.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_vnodeops.c	2007-11-05 10:02:05.000000000 +1100
+++ 2.6.x-xfs-new/fs/xfs/xfs_vnodeops.c	2007-11-05 10:37:53.398623943 +1100
@@ -3556,29 +3556,6 @@ xfs_inode_flush(
 	    ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)))
 		return 0;
 
-	if (flags & FLUSH_LOG) {
-		if (iip && iip->ili_last_lsn) {
-			xlog_t		*log = mp->m_log;
-			xfs_lsn_t	sync_lsn;
-			int		s, log_flags = XFS_LOG_FORCE;
-
-			s = GRANT_LOCK(log);
-			sync_lsn = log->l_last_sync_lsn;
-			GRANT_UNLOCK(log, s);
-
-			if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) {
-				if (flags & FLUSH_SYNC)
-					log_flags |= XFS_LOG_SYNC;
-				error = xfs_log_force(mp, iip->ili_last_lsn, log_flags);
-				if (error)
-					return error;
-			}
-
-			if (ip->i_update_core == 0)
-				return 0;
-		}
-	}
-
 	/*
 	 * We make this non-blocking if the inode is contended,
 	 * return EAGAIN to indicate to the caller that they
@@ -3586,30 +3563,22 @@ xfs_inode_flush(
 	 * blocking on inodes inside another operation right
 	 * now, they get caught later by xfs_sync.
 	 */
-	if (flags & FLUSH_INODE) {
-		int	flush_flags;
-
-		if (flags & FLUSH_SYNC) {
-			xfs_ilock(ip, XFS_ILOCK_SHARED);
-			xfs_iflock(ip);
-		} else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
-			if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
-				xfs_iunlock(ip, XFS_ILOCK_SHARED);
-				return EAGAIN;
-			}
-		} else {
+	if (flags & FLUSH_SYNC) {
+		xfs_ilock(ip, XFS_ILOCK_SHARED);
+		xfs_iflock(ip);
+	} else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
+		if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
+			xfs_iunlock(ip, XFS_ILOCK_SHARED);
 			return EAGAIN;
 		}
-
-		if (flags & FLUSH_SYNC)
-			flush_flags = XFS_IFLUSH_SYNC;
-		else
-			flush_flags = XFS_IFLUSH_ASYNC;
-
-		error = xfs_iflush(ip, flush_flags);
-		xfs_iunlock(ip, XFS_ILOCK_SHARED);
+	} else {
+		return EAGAIN;
 	}
 
+	error = xfs_iflush(ip, (flags & FLUSH_SYNC) ? XFS_IFLUSH_SYNC
+						    : XFS_IFLUSH_ASYNC);
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
 	return error;
 }
 

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-05  1:45                                               ` David Chinner
@ 2007-11-05  7:01                                                 ` Torsten Kaiser
  2007-11-05 18:27                                                 ` Torsten Kaiser
  1 sibling, 0 replies; 61+ messages in thread
From: Torsten Kaiser @ 2007-11-05  7:01 UTC (permalink / raw)
  To: David Chinner
  Cc: Peter Zijlstra, Fengguang Wu, Maxim Levitsky, linux-kernel,
	Andrew Morton, linux-fsdevel, xfs

On 11/5/07, David Chinner <dgc@sgi.com> wrote:
> On Sun, Nov 04, 2007 at 12:19:19PM +0100, Torsten Kaiser wrote:
> > I can now confirm, that I see this also with the current mainline-git-version
> > I used 2.6.24-rc1-git-b4f555081fdd27d13e6ff39d455d5aefae9d2c0c
> > plus the fix for the sg changes in ieee1394.
>
> Ok, so it's probably a side effect of the writeback changes.
>
> Attached are two patches (two because one was in a separate patchset as
> a standalone change) that should prevent async writeback from blocking
> on locked inode cluster buffers. Apply the xfs-factor-inotobp patch first.
> Can you see if this fixes the problem?

Applied both patches against the kernel mentioned above.
This blows up at boot:
[   80.807589] Filesystem "dm-0": Disabling barriers, not supported by
the underlying device
[   80.820241] XFS mounting filesystem dm-0
[   80.913144] ------------[ cut here ]------------
[   80.914932] kernel BUG at drivers/md/raid5.c:143!
[   80.916751] invalid opcode: 0000 [1] SMP
[   80.918338] CPU 3
[   80.919142] Modules linked in:
[   80.920345] Pid: 974, comm: md1_raid5 Not tainted 2.6.24-rc1 #3
[   80.922628] RIP: 0010:[<ffffffff804b6ee4>]  [<ffffffff804b6ee4>]
__release_stripe+0x164/0x170
[   80.925935] RSP: 0018:ffff8100060e7dd0  EFLAGS: 00010002
[   80.927987] RAX: 0000000000000000 RBX: ffff81010141c288 RCX: 0000000000000000
[   80.930738] RDX: 0000000000000000 RSI: ffff81010141c288 RDI: ffff810004fb3200
[   80.933488] RBP: ffff810004fb3200 R08: 0000000000000000 R09: 0000000000000005
[   80.936240] R10: 0000000000000e00 R11: ffffe200038465e8 R12: ffff81010141c298
[   80.938990] R13: 0000000000000286 R14: ffff810004fb3330 R15: 0000000000000000
[   80.941741] FS:  000000000060c870(0000) GS:ffff810100313700(0000)
knlGS:0000000000000000
[   80.944861] CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
[   80.947080] CR2: 00007fff7b295000 CR3: 0000000101842000 CR4: 00000000000006e0
[   80.949830] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   80.952580] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[   80.955332] Process md1_raid5 (pid: 974, threadinfo
ffff8100060e6000, task ffff81000645c730)
[   80.958584] Stack:  ffff81010141c288 00000000000001f4
ffff810004fb3200 ffffffff804b6f2d
[   80.961761]  00000000000001f4 ffff81010141c288 ffffffff804c8bd0
0000000000000000
[   80.964681]  ffff8100060e7ee8 ffffffff804bd094 ffff81000645c730
ffff8100060e7e70
[   80.967518] Call Trace:
[   80.968558]  [<ffffffff804b6f2d>] release_stripe+0x3d/0x60
[   80.970677]  [<ffffffff804c8bd0>] md_thread+0x0/0x100
[   80.972629]  [<ffffffff804bd094>] raid5d+0x344/0x450
[   80.974549]  [<ffffffff8023df10>] process_timeout+0x0/0x10
[   80.976668]  [<ffffffff805ae1ca>] schedule_timeout+0x5a/0xd0
[   80.978855]  [<ffffffff804c8bd0>] md_thread+0x0/0x100
[   80.980807]  [<ffffffff804c8c00>] md_thread+0x30/0x100
[   80.982794]  [<ffffffff80249f20>] autoremove_wake_function+0x0/0x30
[   80.985214]  [<ffffffff804c8bd0>] md_thread+0x0/0x100
[   80.987167]  [<ffffffff80249b3b>] kthread+0x4b/0x80
[   80.989054]  [<ffffffff8020c9c8>] child_rip+0xa/0x12
[   80.990972]  [<ffffffff80249af0>] kthread+0x0/0x80
[   80.992824]  [<ffffffff8020c9be>] child_rip+0x0/0x12
[   80.994743]
[   80.995588]
[   80.995588] Code: 0f 0b eb fe 0f 1f 84 00 00 00 00 00 48 83 ec 28
48 89 5c 24
[   80.999307] RIP  [<ffffffff804b6ee4>] __release_stripe+0x164/0x170
[   81.001711]  RSP <ffff8100060e7dd0>

Switching back to unpatched 2.6.23-mm1 boots sucessfull...

Torsten

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-05  1:45                                               ` David Chinner
  2007-11-05  7:01                                                 ` Torsten Kaiser
@ 2007-11-05 18:27                                                 ` Torsten Kaiser
  2007-11-06  4:25                                                   ` David Chinner
  1 sibling, 1 reply; 61+ messages in thread
From: Torsten Kaiser @ 2007-11-05 18:27 UTC (permalink / raw)
  To: David Chinner
  Cc: Peter Zijlstra, Fengguang Wu, Maxim Levitsky, linux-kernel,
	Andrew Morton, linux-fsdevel, xfs

On 11/5/07, David Chinner <dgc@sgi.com> wrote:
> Ok, so it's probably a side effect of the writeback changes.
>
> Attached are two patches (two because one was in a separate patchset as
> a standalone change) that should prevent async writeback from blocking
> on locked inode cluster buffers. Apply the xfs-factor-inotobp patch first.
> Can you see if this fixes the problem?

Now testing v2.6.24-rc1-650-gb55d1b1+ the fix for the missapplied raid5-patch
Applying your two patches ontop of that does not fix the stalls.

vmstat 10 output from unmerging (uninstalling) a kernel:
 1  0      0 3512188    332 192644    0    0   185    12  368  735 10  3 85  1
-> emerge starts to remove the kernel source files
 3  0      0 3506624    332 192836    0    0    15  9825 2458 8307  7 12 81  0
 0  0      0 3507212    332 192836    0    0     0   554  630 1233  0  1 99  0
 0  0      0 3507292    332 192836    0    0     0   537  580 1328  0  1 99  0
 0  0      0 3507168    332 192836    0    0     0   633  626 1380  0  1 99  0
 0  0      0 3507116    332 192836    0    0     0  1510  768 2030  1  2 97  0
 0  0      0 3507596    332 192836    0    0     0   524  540 1544  0  0 99  0
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 0  0      0 3507540    332 192836    0    0     0   489  551 1293  0  0 99  0
 0  0      0 3507528    332 192836    0    0     0   527  510 1432  1  1 99  0
 0  0      0 3508052    332 192840    0    0     0  2088  910 2964  2  3 95  0
 0  0      0 3507888    332 192840    0    0     0   442  565 1383  1  1 99  0
 0  0      0 3508704    332 192840    0    0     0   497  529 1479  0  0 99  0
 0  0      0 3508704    332 192840    0    0     0   594  595 1458  0  0 99  0
 0  0      0 3511492    332 192840    0    0     0  2381 1028 2941  2  3 95  0
 0  0      0 3510684    332 192840    0    0     0   699  600 1390  0  0 99  0
 0  0      0 3511636    332 192840    0    0     0   741  661 1641  0  0 100  0
 0  0      0 3524020    332 192840    0    0     0  2452 1080 3910  2  3 95  0
 0  0      0 3524040    332 192844    0    0     0   530  617 1297  0  0 99  0
 0  0      0 3524128    332 192844    0    0     0   812  674 1667  0  1 99  0
 0  0      0 3527000    332 193672    0    0   339   721  754 1681  3  2 93  1
-> emerge is finished, no dirty or writeback data in /proc/meminfo
 0  0      0 3571056    332 194768    0    0   111   639  632 1344  0  1 99  0
 0  0      0 3571260    332 194768    0    0     0   757  688 1405  1  0 99  0
 0  0      0 3571156    332 194768    0    0     0   753  641 1361  0  0 99  0
 0  0      0 3571404    332 194768    0    0     0   766  653 1389  0  0 99  0
 1  0      0 3571136    332 194768    0    0     6   764  669 1488  0  0 99  0
 0  0      0 3571668    332 194824    0    0     0   764  657 1482  0  0 99  0
 0  0      0 3571848    332 194824    0    0     0   673  659 1406  0  0 99  0
 0  0      0 3571908    332 195052    0    0    22   753  638 1500  0  1 99  0
 0  0      0 3573052    332 195052    0    0     0   765  631 1482  0  1 99  0
 0  0      0 3574144    332 195052    0    0     0   771  640 1497  0  0 99  0
 0  0      0 3573468    332 195052    0    0     0   458  485 1251  0  0 99  0
 0  0      0 3574184    332 195052    0    0     0   427  474 1192  0  0 100  0
 0  0      0 3575092    332 195052    0    0     0   461  482 1235  0  0 99  0
 0  0      0 3576368    332 195056    0    0     0   582  556 1310  0  0 99  0
 0  0      0 3579300    332 195056    0    0     0   695  571 1402  0  0 99  0
 0  0      0 3580376    332 195056    0    0     0   417  568  906  0  0 99  0
 0  0      0 3581212    332 195056    0    0     0   421  559  977  0  1 99  0
 0  0      0 3583780    332 195060    0    0     0   494  555 1080  0  1 99  0
 0  0      0 3584352    332 195060    0    0     0    99  347  559  0  0 99  0
 0  0      0 3585232    332 195060    0    0     0    11  301  621  0  0 99  0
-> disks go idle.

So these patches do not seem to be the source of these excessive disk writes...

Torsten

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
       [not found]                                         ` <393999615.15343@ustc.edu.cn>
  2007-11-02 10:33                                           ` Fengguang Wu
@ 2007-11-05 23:57                                           ` Andrew Morton
  2007-11-06 10:20                                             ` Peter Zijlstra
  2007-11-06 16:25                                             ` Patch tags [was writeout stalls in current -git] Jonathan Corbet
  1 sibling, 2 replies; 61+ messages in thread
From: Andrew Morton @ 2007-11-05 23:57 UTC (permalink / raw)
  To: Fengguang Wu
  Cc: peterz, just.for.lkml, maximlevitsky, linux-kernel, dgc, linux-fsdevel

On Fri, 2 Nov 2007 18:33:29 +0800
Fengguang Wu <wfg@mail.ustc.edu.cn> wrote:

> On Fri, Nov 02, 2007 at 11:15:32AM +0100, Peter Zijlstra wrote:
> > On Fri, 2007-11-02 at 10:21 +0800, Fengguang Wu wrote:
> > 
> > > Interestingly, no background_writeout() appears, but only
> > > balance_dirty_pages() and wb_kupdate.  Obviously wb_kupdate won't
> > > block the process.
> > 
> > Yeah, the background threshold is not (yet) scaled. So it can happen
> > that the bdi_dirty limit is below the background limit.
> > 
> > I'm curious though as to these stalls, though, I can't seem to think of
> > what goes wrong.. esp since most writeback seems to happen from pdflush.
> 
> Me confused too. The new debug patch will confirm whether emerge is
> waiting in balance_dirty_pages().
> 
> > (or I'm totally misreading it - quite a possible as I'm still recovering
> > from a serious cold and not all the green stuff has yet figured out its
> > proper place wrt brain cells 'n stuff)
> 
> Do take care of yourself.
> 
> > 
> > I still have this patch floating around:
> 
> I think this patch is OK for 2.6.24 :-)
> 
> Reviewed-by: Fengguang Wu <wfg@mail.ustc.edu.cn> 

I would prefer Tested-by: :(

> > 
> > ---
> > Subject: mm: speed up writeback ramp-up on clean systems
> > 
> > We allow violation of bdi limits if there is a lot of room on the
> > system. Once we hit half the total limit we start enforcing bdi limits
> > and bdi ramp-up should happen. Doing it this way avoids many small
> > writeouts on an otherwise idle system and should also speed up the
> > ramp-up.

Given the problems we're having in there I'm a bit reluctant to go tossing
hastily put together and inadequately tested stuff onto the fire.  And
that's what this patch looks like to me.

Wanna convince me otherwise?

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-05 18:27                                                 ` Torsten Kaiser
@ 2007-11-06  4:25                                                   ` David Chinner
  2007-11-06  7:10                                                     ` Torsten Kaiser
  2007-11-06 19:01                                                     ` Peter Zijlstra
  0 siblings, 2 replies; 61+ messages in thread
From: David Chinner @ 2007-11-06  4:25 UTC (permalink / raw)
  To: Torsten Kaiser
  Cc: David Chinner, Peter Zijlstra, Fengguang Wu, Maxim Levitsky,
	linux-kernel, Andrew Morton, linux-fsdevel, xfs

On Mon, Nov 05, 2007 at 07:27:16PM +0100, Torsten Kaiser wrote:
> On 11/5/07, David Chinner <dgc@sgi.com> wrote:
> > Ok, so it's probably a side effect of the writeback changes.
> >
> > Attached are two patches (two because one was in a separate patchset as
> > a standalone change) that should prevent async writeback from blocking
> > on locked inode cluster buffers. Apply the xfs-factor-inotobp patch first.
> > Can you see if this fixes the problem?
> 
> Now testing v2.6.24-rc1-650-gb55d1b1+ the fix for the missapplied raid5-patch
> Applying your two patches ontop of that does not fix the stalls.

So you are having RAID5 problems as well?

I'm struggling to understand what possible changed in XFS or writeback that
would lead to stalls like this, esp. as you appear to be removing files when
the stalls occur. Rather than vmstat, can you use something like iostat to
show how busy your disks are?  i.e. are we seeing RMW cycles in the raid5 or
some such issue.

OOC, what is the 'xfs_info <mtpt>' output for your filesystem? 

> vmstat 10 output from unmerging (uninstalling) a kernel:
>  1  0      0 3512188    332 192644    0    0   185    12  368  735 10  3 85  1
> -> emerge starts to remove the kernel source files
>  3  0      0 3506624    332 192836    0    0    15  9825 2458 8307  7 12 81  0
>  0  0      0 3507212    332 192836    0    0     0   554  630 1233  0  1 99  0
>  0  0      0 3507292    332 192836    0    0     0   537  580 1328  0  1 99  0
>  0  0      0 3507168    332 192836    0    0     0   633  626 1380  0  1 99  0
>  0  0      0 3507116    332 192836    0    0     0  1510  768 2030  1  2 97  0
>  0  0      0 3507596    332 192836    0    0     0   524  540 1544  0  0 99  0
> procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
>  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
>  0  0      0 3507540    332 192836    0    0     0   489  551 1293  0  0 99  0
>  0  0      0 3507528    332 192836    0    0     0   527  510 1432  1  1 99  0
>  0  0      0 3508052    332 192840    0    0     0  2088  910 2964  2  3 95  0
>  0  0      0 3507888    332 192840    0    0     0   442  565 1383  1  1 99  0
>  0  0      0 3508704    332 192840    0    0     0   497  529 1479  0  0 99  0
>  0  0      0 3508704    332 192840    0    0     0   594  595 1458  0  0 99  0
>  0  0      0 3511492    332 192840    0    0     0  2381 1028 2941  2  3 95  0
>  0  0      0 3510684    332 192840    0    0     0   699  600 1390  0  0 99  0
>  0  0      0 3511636    332 192840    0    0     0   741  661 1641  0  0 100  0
>  0  0      0 3524020    332 192840    0    0     0  2452 1080 3910  2  3 95  0
>  0  0      0 3524040    332 192844    0    0     0   530  617 1297  0  0 99  0
>  0  0      0 3524128    332 192844    0    0     0   812  674 1667  0  1 99  0
>  0  0      0 3527000    332 193672    0    0   339   721  754 1681  3  2 93  1
> -> emerge is finished, no dirty or writeback data in /proc/meminfo

At this point, can you run a "sync" and see how long that takes to
complete? The only thing I can think that woul dbe written out after
this point is inodes, but even then it seems to go on for a long,
long time and it really doesn't seem like XFS is holding up the
inode writes.

Another option is to use blktrace/blkparse to determine which process is
issuing this I/O.

>  0  0      0 3583780    332 195060    0    0     0   494  555 1080  0  1 99  0
>  0  0      0 3584352    332 195060    0    0     0    99  347  559  0  0 99  0
>  0  0      0 3585232    332 195060    0    0     0    11  301  621  0  0 99  0
> -> disks go idle.
> 
> So these patches do not seem to be the source of these excessive disk writes...

Well, the patches I posted should prevent blocking in the places that it
was seen, so if that does not stop the slowdowns then either the writeback
code is not feeding us inodes fast enough or the block device below is
having some kind of problem....

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-06  4:25                                                   ` David Chinner
@ 2007-11-06  7:10                                                     ` Torsten Kaiser
  2007-11-06 19:01                                                     ` Peter Zijlstra
  1 sibling, 0 replies; 61+ messages in thread
From: Torsten Kaiser @ 2007-11-06  7:10 UTC (permalink / raw)
  To: David Chinner
  Cc: Peter Zijlstra, Fengguang Wu, Maxim Levitsky, linux-kernel,
	Andrew Morton, linux-fsdevel, xfs

On 11/6/07, David Chinner <dgc@sgi.com> wrote:
> On Mon, Nov 05, 2007 at 07:27:16PM +0100, Torsten Kaiser wrote:
> > On 11/5/07, David Chinner <dgc@sgi.com> wrote:
> > > Ok, so it's probably a side effect of the writeback changes.
> > >
> > > Attached are two patches (two because one was in a separate patchset as
> > > a standalone change) that should prevent async writeback from blocking
> > > on locked inode cluster buffers. Apply the xfs-factor-inotobp patch first.
> > > Can you see if this fixes the problem?
> >
> > Now testing v2.6.24-rc1-650-gb55d1b1+ the fix for the missapplied raid5-patch
> > Applying your two patches ontop of that does not fix the stalls.
>
> So you are having RAID5 problems as well?

The first 2.6.24-rc1-git-kernel that I patched with your patches did
not boot for me. (Oops send in one of my previous mails) But given
that the stacktrace was not xfs related and I had seen this patch on
the lkml, I tried to fix this Oops this way.
I did not have troubles with the RAID5 otherwise.

> I'm struggling to understand what possible changed in XFS or writeback that
> would lead to stalls like this, esp. as you appear to be removing files when
> the stalls occur. Rather than vmstat, can you use something like iostat to
> show how busy your disks are?  i.e. are we seeing RMW cycles in the raid5 or
> some such issue.

Will do this this evening.

> OOC, what is the 'xfs_info <mtpt>' output for your filesystem?

meta-data=/dev/mapper/root       isize=256    agcount=32, agsize=4731132 blks
         =                       sectsz=512   attr=1
data     =                       bsize=4096   blocks=151396224, imaxpct=25
         =                       sunit=0      swidth=0 blks, unwritten=1
naming   =version 2              bsize=4096
log      =internal               bsize=4096   blocks=32768, version=1
         =                       sectsz=512   sunit=0 blks, lazy-count=0
realtime =none                   extsz=4096   blocks=0, rtextents=0


> > vmstat 10 output from unmerging (uninstalling) a kernel:
> >  1  0      0 3512188    332 192644    0    0   185    12  368  735 10  3 85  1
> > -> emerge starts to remove the kernel source files
> >  3  0      0 3506624    332 192836    0    0    15  9825 2458 8307  7 12 81  0
> >  0  0      0 3507212    332 192836    0    0     0   554  630 1233  0  1 99  0
> >  0  0      0 3507292    332 192836    0    0     0   537  580 1328  0  1 99  0
> >  0  0      0 3507168    332 192836    0    0     0   633  626 1380  0  1 99  0
> >  0  0      0 3507116    332 192836    0    0     0  1510  768 2030  1  2 97  0
> >  0  0      0 3507596    332 192836    0    0     0   524  540 1544  0  0 99  0
> > procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
> >  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
> >  0  0      0 3507540    332 192836    0    0     0   489  551 1293  0  0 99  0
> >  0  0      0 3507528    332 192836    0    0     0   527  510 1432  1  1 99  0
> >  0  0      0 3508052    332 192840    0    0     0  2088  910 2964  2  3 95  0
> >  0  0      0 3507888    332 192840    0    0     0   442  565 1383  1  1 99  0
> >  0  0      0 3508704    332 192840    0    0     0   497  529 1479  0  0 99  0
> >  0  0      0 3508704    332 192840    0    0     0   594  595 1458  0  0 99  0
> >  0  0      0 3511492    332 192840    0    0     0  2381 1028 2941  2  3 95  0
> >  0  0      0 3510684    332 192840    0    0     0   699  600 1390  0  0 99  0
> >  0  0      0 3511636    332 192840    0    0     0   741  661 1641  0  0 100  0
> >  0  0      0 3524020    332 192840    0    0     0  2452 1080 3910  2  3 95  0
> >  0  0      0 3524040    332 192844    0    0     0   530  617 1297  0  0 99  0
> >  0  0      0 3524128    332 192844    0    0     0   812  674 1667  0  1 99  0
> >  0  0      0 3527000    332 193672    0    0   339   721  754 1681  3  2 93  1
> > -> emerge is finished, no dirty or writeback data in /proc/meminfo
>
> At this point, can you run a "sync" and see how long that takes to
> complete?

Already tried that: http://lkml.org/lkml/2007/11/2/178
See the logs from the second unmerge in the second half of the mail.

The sync did not stop this writeout, but returned immediately.

> The only thing I can think that woul dbe written out after
> this point is inodes, but even then it seems to go on for a long,
> long time and it really doesn't seem like XFS is holding up the
> inode writes.

Yes, I completly agree that this is much to long. Thats why I included
the after-emerge-finished parts of the logs. But I still partly
suspect xfs, because the xfssyncd shows up when I hip SysRq+W.

> Another option is to use blktrace/blkparse to determine which process is
> issuing this I/O.
>
> >  0  0      0 3583780    332 195060    0    0     0   494  555 1080  0  1 99  0
> >  0  0      0 3584352    332 195060    0    0     0    99  347  559  0  0 99  0
> >  0  0      0 3585232    332 195060    0    0     0    11  301  621  0  0 99  0
> > -> disks go idle.
> >
> > So these patches do not seem to be the source of these excessive disk writes...
>
> Well, the patches I posted should prevent blocking in the places that it
> was seen, so if that does not stop the slowdowns then either the writeback
> code is not feeding us inodes fast enough or the block device below is
> having some kind of problem....

I don't think its the block device, because reading/writing larger
files do not seem to be troubled. It looks much more like an inode
problem. For example both installing and uninstalling kernel source
trees show these stalls, but during uninstalling this is much more
noticeable.

But I agree that this might not be xfs specific, as this showed up at
the same time as other people started reporting about the 100% iowait
bug. Could be that this is the same bug and the differences between
reiserfs and xfs might explain the iowait vs. idle. Or that I don't
see the 100% iowait is something else on my system...

Torsten

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
       [not found]                                           ` <394340668.31055@ustc.edu.cn>
@ 2007-11-06  9:17                                             ` Fengguang Wu
  2007-11-06 21:53                                             ` Torsten Kaiser
  1 sibling, 0 replies; 61+ messages in thread
From: Fengguang Wu @ 2007-11-06  9:17 UTC (permalink / raw)
  To: Torsten Kaiser
  Cc: Peter Zijlstra, Maxim Levitsky, linux-kernel, Andrew Morton,
	David Chinner, linux-fsdevel

On Fri, Nov 02, 2007 at 08:22:10PM +0100, Torsten Kaiser wrote:
> [  547.200000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 58858 > global 12829 72 0 wc __ tw 0 sk 0
> [  550.480000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 57834 > global 12017 62 0 wc __ tw 0 sk 0
> [  552.710000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 56810 > global 11133 83 0 wc __ tw 0 sk 0
> [  558.660000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 55786 > global 10470 33 0 wc _M tw 0 sk 0
        4s
> [  562.750000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 54762 > global 10555 69 0 wc _M tw 0 sk 0
        3s
> [  565.150000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 53738 > global 9562 498 0 wc _M tw -2 sk 0
        4s
> [  569.490000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 52712 > global 8960 2 0 wc _M tw 0 sk 0
        3s
> [  572.910000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 51688 > global 8088 205 0 wc _M tw -13 sk 0
        2s
> [  574.610000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 50651 > global 7114 188 0 wc _M tw -1 sk 0
        10s
> [  584.270000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 49626 > global 14544 0 0 wc _M tw -1 sk 0
        9s
> [  593.050000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 48601 > global 24583 736 0 wc _M tw -1 sk 0
        7s
> [  600.180000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 47576 > global 27004 6 0 wc _M tw 587 sk 0
> [  600.180000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 47139 > global 27004 6 0 wc __ tw 1014 sk 0

The above messages and the below 'D' state pdflush indicate that one
single writeback_inodes(4MB) call takes a long time(up to 10s!) to complete.

Let's try reverting the below patch with `patch -R`? It looks like
the most relevant change - if it's not a low level bug.

> [note] first stall, the output from emerge stops, so it seems it can
> not start processing the next file until the stall ends
> [  630.000000] SysRq : Emergency Sync
> [  630.120000] Emergency Sync complete
> [  632.850000] SysRq : Show Blocked State
> [  632.850000]   task                        PC stack   pid father
> [  632.850000] pdflush       D ffff81000f091788     0   285      2
> [  632.850000]  ffff810005d4da80 0000000000000046 0000000000000800
> 0000007000000001
> [  632.850000]  ffff81000fd52400 ffffffff8022d61c ffffffff80819b00
> ffffffff80819b00
> [  632.850000]  ffffffff80815f40 ffffffff80819b00 ffff810100316f98
> 0000000000000000
> [  632.850000] Call Trace:
> [  632.850000]  [<ffffffff8022d61c>] task_rq_lock+0x4c/0x90
> [  632.850000]  [<ffffffff8022c8ea>] __wake_up_common+0x5a/0x90
> [  632.850000]  [<ffffffff805b16e7>] __down+0xa7/0x11e
> [  632.850000]  [<ffffffff8022da70>] default_wake_function+0x0/0x10
> [  632.850000]  [<ffffffff805b1365>] __down_failed+0x35/0x3a
> [  632.850000]  [<ffffffff803752ce>] xfs_buf_lock+0x3e/0x40
> [  632.850000]  [<ffffffff8037740e>] _xfs_buf_find+0x13e/0x240
> [  632.850000]  [<ffffffff8037757f>] xfs_buf_get_flags+0x6f/0x190
> [  632.850000]  [<ffffffff803776b2>] xfs_buf_read_flags+0x12/0xa0
> [  632.850000]  [<ffffffff80368824>] xfs_trans_read_buf+0x64/0x340
> [  632.850000]  [<ffffffff80352361>] xfs_itobp+0x81/0x1e0
> [  632.850000]  [<ffffffff8026b293>] write_cache_pages+0x123/0x330
> [  632.850000]  [<ffffffff80354d0e>] xfs_iflush+0xfe/0x520
> [  632.850000]  [<ffffffff803ae5d2>] __down_read_trylock+0x42/0x60
> [  632.850000]  [<ffffffff8036ed49>] xfs_inode_flush+0x179/0x1b0
> [  632.850000]  [<ffffffff8037ca8f>] xfs_fs_write_inode+0x2f/0x90
> [  632.850000]  [<ffffffff802b3aac>] __writeback_single_inode+0x2ac/0x380
> [  632.850000]  [<ffffffff804d074e>] dm_table_any_congested+0x2e/0x80
> [  632.850000]  [<ffffffff802b3f9d>] generic_sync_sb_inodes+0x20d/0x330
> [  632.850000]  [<ffffffff802b4532>] writeback_inodes+0xa2/0xe0
> [  632.850000]  [<ffffffff8026bfd6>] wb_kupdate+0xa6/0x140
> [  632.850000]  [<ffffffff8026c4b0>] pdflush+0x0/0x1e0
> [  632.850000]  [<ffffffff8026c5c0>] pdflush+0x110/0x1e0
> [  632.850000]  [<ffffffff8026bf30>] wb_kupdate+0x0/0x140
> [  632.850000]  [<ffffffff8024a32b>] kthread+0x4b/0x80
> [  632.850000]  [<ffffffff8020c9d8>] child_rip+0xa/0x12
> [  632.850000]  [<ffffffff8024a2e0>] kthread+0x0/0x80
> [  632.850000]  [<ffffffff8020c9ce>] child_rip+0x0/0x12
> [  632.850000]
> [  632.850000] emerge        D 0000000000000000     0  6220   6129
> [  632.850000]  ffff810103ced9f8 0000000000000086 0000000000000000
> 0000007000000001
> [  632.850000]  ffff81000fd52cf8 ffffffff00000000 ffffffff80819b00
> ffffffff80819b00
> [  632.850000]  ffffffff80815f40 ffffffff80819b00 ffff810103ced9b8
> ffff810103ced9a8
> [  632.850000] Call Trace:
> [  632.850000]  [<ffffffff805b16e7>] __down+0xa7/0x11e
> [  632.850000]  [<ffffffff8022da70>] default_wake_function+0x0/0x10
> [  632.850000]  [<ffffffff805b1365>] __down_failed+0x35/0x3a
> [  632.850000]  [<ffffffff803752ce>] xfs_buf_lock+0x3e/0x40
> [  632.850000]  [<ffffffff8037740e>] _xfs_buf_find+0x13e/0x240
> [  632.850000]  [<ffffffff8037757f>] xfs_buf_get_flags+0x6f/0x190
> [  632.850000]  [<ffffffff803776b2>] xfs_buf_read_flags+0x12/0xa0
> [  632.850000]  [<ffffffff80368824>] xfs_trans_read_buf+0x64/0x340
> [  632.850000]  [<ffffffff80352361>] xfs_itobp+0x81/0x1e0
> [  632.850000]  [<ffffffff80375bee>] xfs_buf_rele+0x2e/0xd0
> [  632.850000]  [<ffffffff80354d0e>] xfs_iflush+0xfe/0x520
> [  632.850000]  [<ffffffff803ae5d2>] __down_read_trylock+0x42/0x60
> [  632.850000]  [<ffffffff80355c82>] xfs_inode_item_push+0x12/0x20
> [  632.850000]  [<ffffffff80368247>] xfs_trans_push_ail+0x267/0x2b0
> [  632.850000]  [<ffffffff8035c742>] xfs_log_reserve+0x72/0x120
> [  632.850000]  [<ffffffff80366bf8>] xfs_trans_reserve+0xa8/0x210
> [  632.850000]  [<ffffffff803731f2>] kmem_zone_zalloc+0x32/0x50
> [  632.850000]  [<ffffffff8035263b>] xfs_itruncate_finish+0xfb/0x310
> [  632.850000]  [<ffffffff8036daeb>] xfs_free_eofblocks+0x23b/0x280
> [  632.850000]  [<ffffffff80371f93>] xfs_release+0x153/0x200
> [  632.850000]  [<ffffffff80378010>] xfs_file_release+0x10/0x20
> [  632.850000]  [<ffffffff80294251>] __fput+0xb1/0x220
> [  632.850000]  [<ffffffff802910a4>] filp_close+0x54/0x90
> [  632.850000]  [<ffffffff802929bf>] sys_close+0x9f/0x100
> [  632.850000]  [<ffffffff8020bbbe>] system_call+0x7e/0x83
> [  632.850000]
> [  662.180000] mm/page-writeback.c 676 wb_kupdate: pdflush(285) 73045
> global 39157 0 0 wc __ tw 0 sk 0
> [note] emerge resumed
> [  664.030000] SysRq : HELP : loglevel0-8 reBoot tErm Full kIll saK
> showMem Nice powerOff showPc show-all-timers(Q) unRaw Sync showTasks
> Unmount shoW-blocked-tasks

------------------------------------------------------
Subject: writeback: remove pages_skipped accounting in __block_write_full_page()
From: Fengguang Wu <wfg@mail.ustc.edu.cn>

Miklos Szeredi <miklos@szeredi.hu> and me identified a writeback bug:

> The following strange behavior can be observed:
>
> 1. large file is written
> 2. after 30 seconds, nr_dirty goes down by 1024
> 3. then for some time (< 30 sec) nothing happens (disk idle)
> 4. then nr_dirty again goes down by 1024
> 5. repeat from 3. until whole file is written
>
> So basically a 4Mbyte chunk of the file is written every 30 seconds.
> I'm quite sure this is not the intended behavior.

It can be produced by the following test scheme:

# cat bin/test-writeback.sh
grep nr_dirty /proc/vmstat
echo 1 > /proc/sys/fs/inode_debug
dd if=/dev/zero of=/var/x bs=1K count=204800&
while true; do grep nr_dirty /proc/vmstat; sleep 1; done

# bin/test-writeback.sh
nr_dirty 19207
nr_dirty 19207
nr_dirty 30924
204800+0 records in
204800+0 records out
209715200 bytes (210 MB) copied, 1.58363 seconds, 132 MB/s
nr_dirty 47150
nr_dirty 47141
nr_dirty 47142
nr_dirty 47142
nr_dirty 47142
nr_dirty 47142
nr_dirty 47205
nr_dirty 47214
nr_dirty 47214
nr_dirty 47214
nr_dirty 47214
nr_dirty 47214
nr_dirty 47215
nr_dirty 47216
nr_dirty 47216
nr_dirty 47216
nr_dirty 47154
nr_dirty 47143
nr_dirty 47143
nr_dirty 47143
nr_dirty 47143
nr_dirty 47143
nr_dirty 47142
nr_dirty 47142
nr_dirty 47142
nr_dirty 47142
nr_dirty 47134
nr_dirty 47134
nr_dirty 47135
nr_dirty 47135
nr_dirty 47135
nr_dirty 46097 <== -1038
nr_dirty 46098
nr_dirty 46098
nr_dirty 46098
[...]
nr_dirty 46091
nr_dirty 46092
nr_dirty 46092
nr_dirty 45069 <== -1023
nr_dirty 45056
nr_dirty 45056
nr_dirty 45056
[...]
nr_dirty 37822
nr_dirty 36799 <== -1023
[...]
nr_dirty 36781
nr_dirty 35758 <== -1023
[...]
nr_dirty 34708
nr_dirty 33672 <== -1024
[...]
nr_dirty 33692
nr_dirty 32669 <== -1023

% ls -li /var/x
847824 -rw-r--r-- 1 root root 200M 2007-08-12 04:12 /var/x

% dmesg|grep 847824  # generated by a debug printk
[  529.263184] redirtied inode 847824 line 548
[  564.250872] redirtied inode 847824 line 548
[  594.272797] redirtied inode 847824 line 548
[  629.231330] redirtied inode 847824 line 548
[  659.224674] redirtied inode 847824 line 548
[  689.219890] redirtied inode 847824 line 548
[  724.226655] redirtied inode 847824 line 548
[  759.198568] redirtied inode 847824 line 548

# line 548 in fs/fs-writeback.c:
543                 if (wbc->pages_skipped != pages_skipped) {
544                         /*
545                          * writeback is not making progress due to locked
546                          * buffers.  Skip this inode for now.
547                          */
548                         redirty_tail(inode);
549                 }

More debug efforts show that __block_write_full_page()
never has the chance to call submit_bh() for that big dirty file:
the buffer head is *clean*. So basicly no page io is issued by
__block_write_full_page(), hence pages_skipped goes up.

Also the comment in generic_sync_sb_inodes():

544                         /*
545                          * writeback is not making progress due to locked
546                          * buffers.  Skip this inode for now.
547                          */

and the comment in __block_write_full_page():

1713                 /*
1714                  * The page was marked dirty, but the buffers were
1715                  * clean.  Someone wrote them back by hand with
1716                  * ll_rw_block/submit_bh.  A rare case.
1717                  */

do not quite agree with each other. The page writeback should be skipped for
'locked buffer', but here it is 'clean buffer'!

This patch fixes this bug. Though I'm not sure why __block_write_full_page()
is called only to do nothing and who actually issued the writeback for us.

This is the two possible new behaviors after the patch:

1) pretty nice: wait 30s and write ALL:)
2) not so good:
	- during the dd: ~16M
	- after 30s:      ~4M
	- after 5s:       ~4M
	- after 5s:     ~176M

The next patch will fix case (2).

Cc: David Chinner <dgc@sgi.com>
Cc: Ken Chen <kenchen@google.com>
Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 fs/buffer.c                 |    1 -
 fs/xfs/linux-2.6/xfs_aops.c |    5 ++---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff -puN fs/buffer.c~writeback-remove-pages_skipped-accounting-in-__block_write_full_page fs/buffer.c
--- a/fs/buffer.c~writeback-remove-pages_skipped-accounting-in-__block_write_full_page
+++ a/fs/buffer.c
@@ -1730,7 +1730,6 @@ done:
 		 * The page and buffer_heads can be released at any time from
 		 * here on.
 		 */
-		wbc->pages_skipped++;	/* We didn't write this page */
 	}
 	return err;
 
diff -puN fs/xfs/linux-2.6/xfs_aops.c~writeback-remove-pages_skipped-accounting-in-__block_write_full_page fs/xfs/linux-2.6/xfs_aops.c
--- a/fs/xfs/linux-2.6/xfs_aops.c~writeback-remove-pages_skipped-accounting-in-__block_write_full_page
+++ a/fs/xfs/linux-2.6/xfs_aops.c
@@ -402,10 +402,9 @@ xfs_start_page_writeback(
 		clear_page_dirty_for_io(page);
 	set_page_writeback(page);
 	unlock_page(page);
-	if (!buffers) {
+	/* If no buffers on the page are to be written, finish it here */
+	if (!buffers)
 		end_page_writeback(page);
-		wbc->pages_skipped++;	/* We didn't write this page */
-	}
 }
 
 static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
_

Patches currently in -mm which might be from wfg@mail.ustc.edu.cn are

origin.patch


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-05 23:57                                           ` Andrew Morton
@ 2007-11-06 10:20                                             ` Peter Zijlstra
  2007-11-06 16:25                                             ` Patch tags [was writeout stalls in current -git] Jonathan Corbet
  1 sibling, 0 replies; 61+ messages in thread
From: Peter Zijlstra @ 2007-11-06 10:20 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Fengguang Wu, just.for.lkml, maximlevitsky, linux-kernel, dgc,
	linux-fsdevel

On Mon, 2007-11-05 at 15:57 -0800, Andrew Morton wrote:

> > > Subject: mm: speed up writeback ramp-up on clean systems
> > > 
> > > We allow violation of bdi limits if there is a lot of room on the
> > > system. Once we hit half the total limit we start enforcing bdi limits
> > > and bdi ramp-up should happen. Doing it this way avoids many small
> > > writeouts on an otherwise idle system and should also speed up the
> > > ramp-up.
> 
> Given the problems we're having in there I'm a bit reluctant to go tossing
> hastily put together and inadequately tested stuff onto the fire.  And
> that's what this patch looks like to me.

Not really hastily, I think it was written before the stuff hit
mainline. Inadequately tested, perhaps, its been in my and probably Wu's
kernels for a while. Granted that's not a lot of testing in the face of
those who have problems atm.

> Wanna convince me otherwise?

I'm perfectly happy with this patch earning its credits in -mm for a
while and maybe going in around -rc4 or something like that (hoping that
by then we've fixed these nagging issues).

Another patch I did come up with yesterday - not driven by any problems
in that area - could perhaps join this one on that path:

---
Subject: mm: bdi: tweak task dirty penalty

Penalizing heavy dirtiers with 1/8-th the total dirty limit might be rather
excessive on large memory machines. Use sqrt to scale it sub-linearly.

Update the comment while we're there.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 mm/page-writeback.c |   12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

Index: linux-2.6-2/mm/page-writeback.c
===================================================================
--- linux-2.6-2.orig/mm/page-writeback.c
+++ linux-2.6-2/mm/page-writeback.c
@@ -213,17 +213,21 @@ static inline void task_dirties_fraction
 }
 
 /*
- * scale the dirty limit
+ * Task specific dirty limit:
  *
- * task specific dirty limit:
+ *   dirty -= 8 * sqrt(dirty) * p_{t}
  *
- *   dirty -= (dirty/8) * p_{t}
+ * Penalize tasks that dirty a lot of pages by lowering their dirty limit. This
+ * avoids infrequent dirtiers from getting stuck in this other guys dirty
+ * pages.
+ *
+ * Use a sub-linear function to scale the penalty, we only need a little room.
  */
 void task_dirty_limit(struct task_struct *tsk, long *pdirty)
 {
 	long numerator, denominator;
 	long dirty = *pdirty;
-	u64 inv = dirty >> 3;
+	u64 inv = 8*int_sqrt(dirty);
 
 	task_dirties_fraction(tsk, &numerator, &denominator);
 	inv *= numerator;



^ permalink raw reply	[flat|nested] 61+ messages in thread

* Patch tags [was writeout stalls in current -git]
  2007-11-05 23:57                                           ` Andrew Morton
  2007-11-06 10:20                                             ` Peter Zijlstra
@ 2007-11-06 16:25                                             ` Jonathan Corbet
  2007-11-06 17:03                                               ` Balbir Singh
  2007-11-06 23:26                                               ` Adrian Bunk
  1 sibling, 2 replies; 61+ messages in thread
From: Jonathan Corbet @ 2007-11-06 16:25 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

Andrew wrote:

> > Reviewed-by: Fengguang Wu <wfg@mail.ustc.edu.cn> 
> 
> I would prefer Tested-by: :(

This seems like as good an opportunity as any to toss my patch tags
document out there one more time.  I still think it's a good idea to
codify some sort of consensus on what these tags mean... 

jon

diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 299615d..1948a93 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -286,6 +286,8 @@ parport.txt
 	- how to use the parallel-port driver.
 parport-lowlevel.txt
 	- description and usage of the low level parallel port functions.
+patch-tags
+	- description of the tags which can be added to patches
 pci-error-recovery.txt
 	- info on PCI error recovery.
 pci.txt
diff --git a/Documentation/patch-tags b/Documentation/patch-tags
new file mode 100644
index 0000000..6acde5e
--- /dev/null
+++ b/Documentation/patch-tags
@@ -0,0 +1,76 @@
+Patches headed for the mainline may contain a variety of tags documenting
+who played a hand in (or was at least aware of) their progress.  All of
+these tags have the form:
+
+	Something-done-by: Full name <email@address> [optional random stuff]
+
+These tags are:
+
+From: 	   	The original author of the patch.  This tag will ensure
+		that credit is properly given when somebody other than the
+		original author submits the patch.
+
+Signed-off-by:	A person adding a Signed-off-by tag is attesting that the
+		patch is, to the best of his or her knowledge, legally able
+		to be merged into the mainline and distributed under the
+		terms of the GNU General Public License, version 2.  See
+		the Developer's Certificate of Origin, found in
+		Documentation/SubmittingPatches, for the precise meaning of
+		Signed-off-by.  This tag assures upstream maintainers that
+		the provenance of the patch is known and allows the origin
+		of the patch to be reviewed should copyright questions
+		arise.
+
+Acked-by:	The person named (who should be an active developer in the
+		area addressed by the patch) is aware of the patch and has
+		no objection to its inclusion; it informs upstream
+		maintainers that a certain degree of consensus on the patch
+		as been achieved..  An Acked-by tag does not imply any
+		involvement in the development of the patch or that a
+		detailed review was done. 
+
+Reviewed-by:	The patch has been reviewed and found acceptable according
+		to the Reviewer's Statement as found at the bottom of this
+		file.  A Reviewed-by tag is a statement of opinion that the
+		patch is an appropriate modification of the kernel without
+		any remaining serious technical issues.  Any interested
+		reviewer (who has done the work) can offer a Reviewed-by
+		tag for a patch.  This tag serves to give credit to
+		reviewers and to inform maintainers of the degree of review
+		which has been done on the patch.
+
+Cc:		The person named was given the opportunity to comment on
+		the patch.  This is the only tag which might be added
+		without an explicit action by the person it names.  This
+		tag documents that potentially interested parties have been
+		included in the discussion.
+
+Tested-by:	The patch has been successfully tested (in some
+		environment) by the person named.  This tag informs
+		maintainers that some testing has been performed, provides
+		a means to locate testers for future patches, and ensures
+		credit for the testers.
+
+
+----
+
+Reviewer's statement of oversight, v0.02
+
+By offering my Reviewed-by: tag, I state that:
+
+ (a) I have carried out a technical review of this patch to evaluate its
+     appropriateness and readiness for inclusion into the mainline kernel. 
+
+ (b) Any problems, concerns, or questions relating to the patch have been
+     communicated back to the submitter.  I am satisfied with the
+     submitter's response to my comments.
+
+ (c) While there may be things that could be improved with this submission,
+     I believe that it is, at this time, (1) a worthwhile modification to
+     the kernel, and (2) free of known issues which would argue against its
+     inclusion.
+
+ (d) While I have reviewed the patch and believe it to be sound, I do not
+     (unless explicitly stated elsewhere) make any warranties or guarantees
+     that it will achieve its stated purpose or function properly in any
+     given situation.

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: Patch tags [was writeout stalls in current -git]
  2007-11-06 16:25                                             ` Patch tags [was writeout stalls in current -git] Jonathan Corbet
@ 2007-11-06 17:03                                               ` Balbir Singh
  2007-11-06 23:26                                               ` Adrian Bunk
  1 sibling, 0 replies; 61+ messages in thread
From: Balbir Singh @ 2007-11-06 17:03 UTC (permalink / raw)
  To: Jonathan Corbet; +Cc: Andrew Morton, linux-kernel

> This seems like as good an opportunity as any to toss my patch tags
> document out there one more time.  I still think it's a good idea to
> codify some sort of consensus on what these tags mean...
>
> jon
>

[snip]

> +By offering my Reviewed-by: tag, I state that:
> +
> + (a) I have carried out a technical review of this patch to evaluate its
> +     appropriateness and readiness for inclusion into the mainline kernel.
> +
> + (b) Any problems, concerns, or questions relating to the patch have been
> +     communicated back to the submitter.  I am satisfied with the
> +     submitter's response to my comments.
> +
> + (c) While there may be things that could be improved with this submission,
> +     I believe that it is, at this time, (1) a worthwhile modification to
> +     the kernel, and (2) free of known issues which would argue against its
> +     inclusion.
> +
> + (d) While I have reviewed the patch and believe it to be sound, I do not
> +     (unless explicitly stated elsewhere) make any warranties or guarantees
> +     that it will achieve its stated purpose or function properly in any
> +     given situation.

How about adding a Commented-on-by?

Initial version(s) that are not suitable or still shaping up are
commented-on by several people. A person who comments on one version
might not do a thorough review of the entire code, but through a
series of comments has contributed by pushing the developer in the
correct direction.

Balbir

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-06  4:25                                                   ` David Chinner
  2007-11-06  7:10                                                     ` Torsten Kaiser
@ 2007-11-06 19:01                                                     ` Peter Zijlstra
  2007-11-06 20:26                                                       ` Torsten Kaiser
  1 sibling, 1 reply; 61+ messages in thread
From: Peter Zijlstra @ 2007-11-06 19:01 UTC (permalink / raw)
  To: David Chinner
  Cc: Torsten Kaiser, Fengguang Wu, Maxim Levitsky, linux-kernel,
	Andrew Morton, linux-fsdevel, xfs

On Tue, 2007-11-06 at 15:25 +1100, David Chinner wrote:

> I'm struggling to understand what possible changed in XFS or writeback that
> would lead to stalls like this, esp. as you appear to be removing files when
> the stalls occur. 

Just a crazy idea,..

Could there be a set_page_dirty() that doesn't have
balance_dirty_pages() call near? For example modifying meta data in
unlink?

Such a situation could lead to an excess of dirty pages and the next
call to balance_dirty_pages() would appear to stall, as it would
desperately try to get below the limit again.


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-06 19:01                                                     ` Peter Zijlstra
@ 2007-11-06 20:26                                                       ` Torsten Kaiser
  0 siblings, 0 replies; 61+ messages in thread
From: Torsten Kaiser @ 2007-11-06 20:26 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: David Chinner, Fengguang Wu, Maxim Levitsky, linux-kernel,
	Andrew Morton, linux-fsdevel, xfs

On 11/6/07, Peter Zijlstra <peterz@infradead.org> wrote:
> On Tue, 2007-11-06 at 15:25 +1100, David Chinner wrote:
>
> > I'm struggling to understand what possible changed in XFS or writeback that
> > would lead to stalls like this, esp. as you appear to be removing files when
> > the stalls occur.
>
> Just a crazy idea,..
>
> Could there be a set_page_dirty() that doesn't have
> balance_dirty_pages() call near? For example modifying meta data in
> unlink?
>
> Such a situation could lead to an excess of dirty pages and the next
> call to balance_dirty_pages() would appear to stall, as it would
> desperately try to get below the limit again.

Only if accounting of the dirty pages is also broken.
In the unmerge testcase I see most of the time only <200kb of dirty
data in /proc/meminfo.

The system has 4Gb of RAM so I'm not sure if it should ever be valid
to stall even the emerge/install testcase.

Torsten

Now building a kernel with the skipped-pages-accounting-patch reverted...

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
       [not found]                                           ` <394340668.31055@ustc.edu.cn>
  2007-11-06  9:17                                             ` Fengguang Wu
@ 2007-11-06 21:53                                             ` Torsten Kaiser
  2007-11-06 23:31                                               ` David Chinner
  1 sibling, 1 reply; 61+ messages in thread
From: Torsten Kaiser @ 2007-11-06 21:53 UTC (permalink / raw)
  To: Fengguang Wu
  Cc: Peter Zijlstra, Maxim Levitsky, linux-kernel, Andrew Morton,
	David Chinner, linux-fsdevel

On 11/6/07, Fengguang Wu <wfg@mail.ustc.edu.cn> wrote:
> ------------------------------------------------------
> Subject: writeback: remove pages_skipped accounting in __block_write_full_page()
> From: Fengguang Wu <wfg@mail.ustc.edu.cn>
>
> Miklos Szeredi <miklos@szeredi.hu> and me identified a writeback bug:
[sni]
>  fs/buffer.c                 |    1 -
>  fs/xfs/linux-2.6/xfs_aops.c |    5 ++---
>  2 files changed, 2 insertions(+), 4 deletions(-)

I have now testet v2.6.24-rc1-748-g2655e2c with above patch reverted.
This does still stall.

On 11/6/07, David Chinner <dgc@sgi.com> wrote:
> Rather than vmstat, can you use something like iostat to show how busy your
> disks are?  i.e. are we seeing RMW cycles in the raid5 or some such issue.

Both "vmstat 10" and "iostat -x 10" output from this test:
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 2  0      0 3700592      0  85424    0    0    31    83  108  244  2  1 95  1
-> emerge reads something, don't knwo for sure what...
 1  0      0 3665352      0  87940    0    0   239     2  343  585  2  1 97  0
 0  0      0 3657728      0  91228    0    0   322    35  445  833  0  0 99  0
 1  0      0 3653136      0  94692    0    0   330    33  455  844  1  1 98  0
 0  0      0 3646836      0  97720    0    0   289     3  422  751  1  1 98  0
 0  0      0 3616468      0  99692    0    0   185    33  399  614  9  3 87  1
-> starts to remove the kernel tree
 0  0      0 3610452      0 102592    0    0   138  3598 1398 3945  3  6 90  1
 0  0      0 3607136      0 104548    0    0     2  5962 1919 6070  4  9 87  0
 0  0      0 3606636      0 105080    0    0     0  1539  810 2200  1  2 97  0
-> first stall 28 sec.
 0  0      0 3606592      0 105292    0    0     0   698  679 1390  0  1 99  0
 0  0      0 3606440      0 105532    0    0     0   658  690 1457  0  0 99  0
 0  0      0 3606068      0 106128    0    0     1  1780  947 1982  1  3 96  0
-> second stall 24 sec.
 0  0      0 3606036      0 106464    0    0     4   858  758 1457  0  1 98  0
 0  0      0 3605380      0 106872    0    0     0  1173  807 1880  1  2 97  0
 0  0      0 3605000      0 107748    0    0     1  2413 1103 2996  2  4 94  0
-> third stall 38 sec.
 0  0      0 3604488      0 108472    0    0    45   897  748 1577  0  1 98  0
 0  0      0 3604176      0 108764    0    0     0   824  752 1700  0  1 98  0
 0  0      0 3604012      0 108988    0    0     0   660  643 1237  0  1 99  0
 0  0      0 3608936      0 110120    0    0     1  3490 1232 3455  3  5 91  0
-> fourth stall 64 sec.
 1  0      0 3609060      0 110296    0    0     0   568  669 1222  0  1 99  0
 0  0      0 3609464      0 110496    0    0     0   604  638 1366  0  1 99  0
 0  0      0 3609244      0 110740    0    0     0   844  714 1282  0  1 99  0
 0  0      0 3609508      0 110912    0    0     0   552  584 1185  1  1 99  0
 2  0      0 3609436      0 111132    0    0     0   658  643 1442  0  1 99  0
 0  0      0 3609212      0 111348    0    0     0   714  637 1382  0  0 99  0
 0  0      0 3619132      0 110492    0    0   130  1086  736 1870  4  3 91  2
 0  0      0 3657016      0 115496    0    0   466   589  718 1367  1  1 98  0
-> emerge finishs, dirty data was the hole time <1Mb, stays now below 300kb
(btrace running...)
 0  0      0 3657844      0 115660    0    0     0   564  635 1226  1  1 99  0
 0  0      0 3658236      0 115840    0    0     0   582  600 1248  1  0 99  0
 0  0      0 3658296      0 116012    0    0     0   566  606 1232  1  1 99  0
 0  0      0 3657924      0 116212    0    0     0   688  596 1321  1  0 99  0
 0  0      0 3658252      0 116416    0    0     0   631  642 1356  1  0 98  0
 0  0      0 3658184      0 116592    0    0     0   566  575 1273  0  0 99  0
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 2  0      0 3658344      0 116772    0    0     0   649  606 1301  0  0 99  0
 0  0      0 3658548      0 116976    0    0     0   617  624 1345  0  0 99  0
 0  0      0 3659204      0 117160    0    0     0   550  576 1223  1  1 99  0
 0  0      0 3659944      0 117344    0    0     0   620  583 1272  0  0 99  0
 0  0      0 3660548      0 117540    0    0     0   605  611 1338  0  0 99  0
 0  0      0 3661236      0 117732    0    0     0   582  569 1275  0  0 99  0
 0  0      0 3662420      0 117888    0    0     0   590  571 1157  0  0 99  0
 0  0      0 3664324      0 118068    0    0     0   566  553 1222  1  1 99  0
 0  0      0 3665240      0 118168    0    0     0   401  574  862  0  0 99  0
 0  0      0 3666984      0 118280    0    0     0   454  574  958  1  1 99  0
 0  0      0 3668664      0 118400    0    0     0   396  559  946  0  0 99  0
 0  0      0 3670628      0 118496    0    0     0   296  495  784  0  0 99  0
 0  0      0 3671316      0 118496    0    0     0    36  334  307  0  0 99  0
-> disks go idle

I also saved the btrace output, but that is even with bzip2 ~1.6Mb.

Summary from btrace
Total (253,0):
 Reads Queued:       5,385,   21,540KiB  Writes Queued:      91,076,  362,640KiB
 Read Dispatches:        0,        0KiB  Write Dispatches:        0,        0KiB
 Reads Requeued:         0               Writes Requeued:         0
 Reads Completed:    5,385,   21,540KiB  Writes Completed:   91,076,  362,640KiB
 Read Merges:            0,        0KiB  Write Merges:            0,        0KiB
 IO unplugs:         8,883               Timer unplugs:           0

Throughput (R/W): 38KiB/s / 654KiB/s
Events (253,0): 201,805 entries
Skips: 0 forward (0 -   0.0%)

The last 20% of the btrace look more or less completely like this, no
other programs do any IO...

253,0    3   104626   526.293450729   974  C  WS 79344288 + 8 [0]
253,0    3   104627   526.293455078   974  C  WS 79344296 + 8 [0]
253,0    1    36469   444.513863133  1068  Q  WS 154998480 + 8 [xfssyncd]
253,0    1    36470   444.513863135  1068  Q  WS 154998488 + 8 [xfssyncd]
253,0    1    36471   444.523967430  1068  Q  WS 117078784 + 8 [xfssyncd]
253,0    1    36472   444.523970097  1068  Q  WS 117078792 + 8 [xfssyncd]
253,0    1    36473   444.548753821  1068  Q  WS 117078784 + 8 [xfssyncd]
253,0    1    36474   444.548756324  1068  Q  WS 117078792 + 8 [xfssyncd]
253,0    1    36475   444.553960214  1068  Q  WS 195314144 + 8 [xfssyncd]
253,0    1    36476   444.553962765  1068  Q  WS 195314152 + 8 [xfssyncd]
253,0    3   104628   526.310490373   974  C  WS 154998480 + 8 [0]
253,0    3   104629   526.310490374   974  C  WS 154998488 + 8 [0]
253,0    3   104630   526.310490386   974  C  WS 154998480 + 8 [0]
253,0    3   104631   526.310490387   974  C  WS 154998488 + 8 [0]
253,0    3   104632   526.310565814   974  C  WS 117078784 + 8 [0]
253,0    3   104633   526.310570195   974  C  WS 117078792 + 8 [0]
253,0    3   104634   526.313450024   974  C  WS 117078784 + 8 [0]
253,0    3   104635   526.313454317   974  C  WS 117078792 + 8 [0]
253,0    1    36477   444.583070774  1068  Q  WS 195314144 + 8 [xfssyncd]
253,0    1    36478   444.583075517  1068  Q  WS 195314152 + 8 [xfssyncd]
253,0    1    36479   444.583954077  1068  Q  WS 233141680 + 8 [xfssyncd]
253,0    1    36480   444.583956804  1068  Q  WS 233141688 + 8 [xfssyncd]
253,0    1    36481   444.619241615  1068  Q  WS 233165296 + 8 [xfssyncd]
253,0    1    36482   444.619247992  1068  Q  WS 233165304 + 8 [xfssyncd]
253,0    3   104636   526.320490406   974  C  WS 195314144 + 8 [0]
253,0    3   104637   526.320490407   974  C  WS 195314152 + 8 [0]
253,0    3   104638   526.320490419   974  C  WS 195314144 + 8 [0]
253,0    3   104639   526.320490420   974  C  WS 195314152 + 8 [0]
253,0    3   104640   526.348720498   974  C  WS 233141680 + 8 [0]
253,0    3   104641   526.348724614   974  C  WS 233141688 + 8 [0]
253,0    1    36483   444.643863141  1068  Q  WS 272297440 + 8 [xfssyncd]
253,0    1    36484   444.643863143  1068  Q  WS 272297448 + 8 [xfssyncd]
253,0    1    36485   444.675408559  1068  Q  WS 272297440 + 8 [xfssyncd]
253,0    1    36486   444.675412236  1068  Q  WS 272297448 + 8 [xfssyncd]

iostat -x 10 output follows, each line from the above vmstat should
correspond to one iostat output

Linux 2.6.24-rc1 (treogen) 	11/06/07

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           2.27    0.00    1.13    1.41    0.00   95.18

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda              14.46    34.81    7.63   15.11   176.60   418.68
26.17     0.53   23.07   5.59  12.71
sdb              14.54    34.60    7.51   14.91   176.01   415.36
26.38     0.43   19.29   5.00  11.20
sdc              14.62    34.50    7.55   15.29   177.12   417.73
26.04     0.47   20.42   5.31  12.12
md1               0.00     0.00   31.99   80.06   254.70   636.20
7.95     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.04     0.00
8.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00   31.88   80.06   254.53   636.20
7.96    24.99  223.19   2.56  28.68
sdd               0.46     0.00    0.10    0.00     0.88     0.00
8.99     0.00    5.25   1.91   0.02

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           1.85    0.00    0.55    0.19    0.00   97.41

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda              16.80     0.00    4.00    0.80   166.40    11.20
37.00     0.07   13.96  12.29   5.90
sdb              19.40     0.00    4.50    0.70   191.20    10.40
38.77     0.07   13.27  10.77   5.60
sdc              18.20     0.10    6.50    1.30   197.60    14.50
27.19     0.11   13.85  11.41   8.90
md1               0.00     0.00   69.50    0.20   556.00     0.50
7.98     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00   69.50    0.20   556.00     0.50
7.98     0.67    9.53   2.38  16.60
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.45    0.00    0.47    0.05    0.00   99.03

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               6.70     0.10   21.90    1.10   228.80    15.20
10.61     0.43   18.70  16.70  38.40
sdb               6.00     0.10   19.30    1.00   201.60    14.40
10.64     0.33   16.40  15.22  30.90
sdc               5.70     0.20   21.50    1.50   217.60    49.30
11.60     0.40   17.22  15.13  34.80
md1               0.00     0.00   81.10    0.70   648.80     5.60
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00   81.10    0.70   648.80     5.60
8.00     1.61   19.73  12.11  99.10
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.94    0.00    0.79    0.02    0.00   98.24

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               4.70     0.00   21.10    1.50   206.40    22.40
10.12     0.40   17.65  16.11  36.40
sdb               6.20     0.10   20.80    1.50   216.00    23.20
10.73     0.35   15.70  13.50  30.10
sdc               5.50     0.10   23.60    2.40   232.80    57.00
11.15     0.46   17.65  14.96  38.90
md1               0.00     0.00   81.80    0.40   654.40     2.50
7.99     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00   81.80    0.40   654.40     2.50
7.99     1.55   18.84  11.98  98.50
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           1.63    0.00    1.09    0.00    0.00   97.28

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               5.30     0.00   16.60    0.20   175.20     3.20
10.62     0.34   20.30  18.93  31.80
sdb               6.50     0.00   19.00    0.20   204.80     3.20
10.83     0.35   18.39  17.29  33.20
sdc               5.60     0.00   17.50    0.30   184.80     4.00
10.61     0.34   19.33  18.48  32.90
md1               0.00     0.00   70.50    0.00   564.00     0.00
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00   70.50    0.00   564.00     0.00
8.00     1.43   20.30  13.49  95.10
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           9.38    0.00    3.45    1.55    0.00   85.62

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               9.30     1.60   15.50    2.20   198.40    40.00
13.47     0.27   15.08  12.71  22.50
sdb               8.40     3.50   12.00    1.80   163.20    52.00
15.59     0.19   13.62  12.54  17.30
sdc               8.30     4.10   13.70    2.90   176.00   118.90
17.77     0.24   14.58  11.93  19.80
md1               0.00     0.00   61.00    5.50   488.00    42.30
7.97     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00   61.00    5.50   488.00    42.30
7.97     1.19   17.80   8.00  53.20
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           3.26    0.00    8.36    0.07    0.00   88.30

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda             207.00   584.40  181.70   79.40  3109.60  5391.20
32.56     2.28    8.74   2.42  63.10
sdb             209.60   584.30  184.20   77.50  3150.40  5373.60
32.57     2.02    7.74   1.93  50.50
sdc             195.20   589.40  198.20   80.10  3147.20  5760.80
32.01     2.37    8.51   2.33  64.80
md1               0.00     0.00   12.00 1182.20    96.00  9456.20
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00   12.00 1182.80    96.00  9461.00
8.00    61.79   51.70   0.83  99.20
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           3.24    0.00    6.90    0.02    0.00   89.84

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda             203.60   541.60  163.40   84.60  2936.80  5101.60
32.41     5.66   22.74   3.05  75.70
sdb             201.10   533.20  165.90   83.50  2936.80  5028.00
31.94     5.23   20.77   2.61  65.20
sdc             201.00   540.30  164.50   89.50  2924.00  5346.30
32.56     5.77   22.71   3.00  76.30
md1               0.00     0.00    0.50 1115.30     4.00  8877.30
7.96     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.50 1114.70     4.00  8872.50
7.96    93.14   81.84   0.89  99.80
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           1.16    0.00    2.32    0.00    0.00   96.52

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda              56.60   154.10   32.90   54.90   716.00  1726.40
27.82     0.45    5.26   2.79  24.50
sdb              57.80   161.00   35.60   57.10   747.20  1801.60
27.50     0.48    5.65   2.80  26.00
sdc              58.00   162.30   32.00   56.70   720.00  1808.10
28.50     0.44    4.88   2.82  25.00
md1               0.00     0.00    0.00  355.90     0.00  2842.30
7.99     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  355.90     0.00  2842.30
7.99     9.02   30.64   2.71  96.50
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.31    0.00    0.52    0.00    0.00   99.17

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0.70    45.00    6.10   71.40    54.40   974.40
13.27     3.15   40.67   3.50  27.10
sdb               1.90    20.90    7.50   46.70    75.20   584.00
12.16     1.64   30.18   3.69  20.00
sdc               1.80    35.50    6.80   62.40    68.80  1055.20
16.24     1.82   26.30   3.16  21.90
md1               0.00     0.00    0.00  135.20     0.00  1038.20
7.68     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  135.20     0.00  1038.20
7.68    14.41  106.61   7.32  99.00
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.50    0.00    0.36    0.00    0.00   99.14

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               2.10    48.90    3.10   64.00    41.60   952.00
14.81     0.64    9.60   2.91  19.50
sdb               2.60    26.20    3.90   40.80    52.00   584.80
14.25     0.52   11.59   3.31  14.80
sdc               2.20    55.60    3.40   72.90    44.00  1076.90
14.69     0.67    8.73   2.44  18.60
md1               0.00     0.00    0.00  144.80     0.00  1158.40
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  144.80     0.00  1158.40
8.00     5.74   39.59   6.88  99.60
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           1.46    0.00    3.01    0.00    0.00   95.53

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda              61.10   183.80   39.10   84.70   801.60  2204.80
24.28     0.82    6.62   2.05  25.40
sdb              57.80   180.00   42.70   77.70   804.00  2113.60
24.23     0.92    7.66   1.87  22.50
sdc              57.40   182.70   41.60   85.80   792.80  2200.10
23.49     1.11    8.74   2.06  26.20
md1               0.00     0.00    1.20  438.50     9.60  3507.10
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    1.20  438.50     9.60  3507.10
8.00    15.63   35.55   2.26  99.40
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.43    0.00    1.07    0.21    0.00   98.29

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               1.90    22.00    7.50   42.20    75.20   557.60
12.73     2.54   51.07   4.47  22.20
sdb               1.10    58.70    6.50   82.00    60.80  1169.60
13.90     2.69   30.36   2.49  22.00
sdc               0.90    59.50    6.90   83.50    62.40  1409.60
16.28     3.13   34.67   2.78  25.10
md1               0.00     0.00    0.10  168.70     0.80  1305.80
7.74     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.10  168.70     0.80  1305.80
7.74    15.74   93.27   5.90  99.60
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           1.79    0.00    3.94    0.00    0.00   94.27

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda              80.40   200.50   43.10   64.10   976.00  2158.40
29.24     0.39    3.64   1.73  18.50
sdb              77.30   232.60   44.80   93.80   968.80  2655.20
26.15     0.34    2.47   1.17  16.20
sdc              67.00   244.30   52.60  103.50   944.80  2826.50
24.16     0.39    2.45   1.13  17.60
md1               0.00     0.00    0.20  532.90     1.60  4260.50
7.99     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.20  533.70     1.60  4266.90
7.99    11.08   20.71   1.87  99.90
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           1.04    0.00    1.40    0.00    0.00   97.55

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda              40.80   138.90   18.70   77.50   488.00  1768.00
23.45     0.46    4.76   1.57  15.10
sdb              41.30   115.90   19.80   58.80   496.80  1436.00
24.59     0.83   10.61   2.09  16.40
sdc              35.20   149.50   25.90   89.40   500.80  1952.10
21.27     1.01    8.77   1.59  18.30
md1               0.00     0.00    0.10  335.50     0.80  2681.70
7.99     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.10  334.70     0.80  2675.30
7.99     9.89   29.61   2.94  98.30
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.44    0.00    1.08    0.49    0.00   97.99

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               4.50    37.20    9.60   63.60   112.80   853.60
13.20     1.83   25.01   2.90  21.20
sdb               3.80    58.90    9.90   82.40   109.60  1177.60
13.95     1.68   18.15   2.36  21.80
sdc               3.90    49.30    9.20   72.70   104.80  1327.20
17.48     2.09   25.53   2.94  24.10
md1               0.00     0.00   11.20  176.20    89.60  1362.00
7.75     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00   11.20  176.20    89.60  1362.00
7.75    10.78   57.52   5.34 100.00
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.37    0.00    1.26    0.00    0.00   98.37

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               1.70    48.70    2.10   69.40    30.40   983.20
14.18     0.22    3.02   1.48  10.60
sdb               2.40    55.90    2.80   76.20    41.60  1095.20
14.39     0.24    3.05   1.28  10.10
sdc               1.20    57.50    1.50   80.70    21.60  1143.50
14.17     0.23    2.76   1.24  10.20
md1               0.00     0.00    0.00  186.70     0.00  1493.60
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  186.70     0.00  1493.60
8.00     2.63   14.10   5.33  99.50
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.18    0.00    0.60    0.02    0.00   99.19

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               3.50    51.10    5.10   68.70    68.80  1005.60
14.56     1.82   24.61   4.32  31.90
sdb               4.10    46.00    6.50   62.50    84.80   915.20
14.49     1.29   18.68   3.74  25.80
sdc               4.90    32.40    7.00   47.70    95.20   688.90
14.33     1.31   23.97   4.26  23.30
md1               0.00     0.00    0.00  145.00     0.00  1160.00
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  145.00     0.00  1160.00
8.00    12.36   85.27   6.59  95.50
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           3.26    0.00    5.29    0.00    0.00   91.45

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda             136.10   375.40  101.70   75.40  1902.40  3672.00
31.48     0.57    3.24   1.87  33.20
sdb             150.60   372.90   88.50   77.50  1912.80  3664.80
33.60     0.57    3.43   1.90  31.60
sdc             141.30   388.80   95.20   88.40  1892.00  4198.40
33.17     0.69    3.76   1.98  36.40
md1               0.00     0.00    0.30  813.90     2.40  6509.20
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.30  813.90     2.40  6509.20
8.00    22.48   27.60   1.22  99.20
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.23    0.00    0.54    0.00    0.00   99.22

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               1.60    33.50    7.80   55.20    75.20   759.20
13.24     2.85   45.32   4.52  28.50
sdb               1.40    36.20    7.40   58.30    70.40   805.60
13.33     3.34   50.84   4.35  28.60
sdc               1.10    32.20    7.10   53.40    65.60   733.90
13.21     3.20   52.89   4.60  27.80
md1               0.00     0.00    0.00  128.60     0.00   984.00
7.65     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  128.60     0.00   984.00
7.65    19.93  154.97   7.60  97.80
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.42    0.00    0.56    0.00    0.00   99.02

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               1.10    54.10    1.30   61.60    19.20   967.20
15.68     0.29    4.55   2.81  17.70
sdb               2.30    26.40    2.40   34.90    37.60   532.00
15.27     0.27    7.10   4.29  16.00
sdc               0.90    50.00    1.00   59.20    15.20   914.50
15.44     0.27    4.47   2.59  15.60
md1               0.00     0.00    0.00  134.80     0.00  1078.40
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  134.80     0.00  1078.40
8.00     2.40   17.86   7.31  98.60
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.45    0.00    0.66    0.00    0.00   98.88

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               4.70    56.50    7.40   75.90    96.80  1116.00
14.56     1.67   20.07   4.23  35.20
sdb               4.20    42.70    6.80   62.70    88.00   900.00
14.22     1.30   18.75   3.68  25.60
sdc               5.20    52.90    8.10   71.50   106.40  1168.80
16.02     1.73   21.68   4.48  35.70
md1               0.00     0.00    0.00  170.20     0.00  1361.60
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  170.20     0.00  1361.60
8.00    17.84  104.81   5.80  98.80
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.47    0.00    0.66    0.00    0.00   98.87

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               1.30    48.20    1.30   53.80    20.00   856.80
15.91     0.28    5.08   3.36  18.50
sdb               1.60    45.40    1.60   51.50    25.60   816.00
15.85     0.28    5.24   3.15  16.70
sdc               1.60    41.80    1.70   47.60    26.40   755.70
15.86     0.28    5.58   3.39  16.70
md1               0.00     0.00    0.00  136.40     0.00  1091.20
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  136.40     0.00  1091.20
8.00     2.48   18.15   7.17  97.80
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.45    0.00    0.78    0.00    0.00   98.77

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0.70    48.00    0.80   52.30    12.80   833.60
15.94     0.22    4.16   2.34  12.40
sdb               1.50    38.20    1.50   42.90    24.00   680.00
15.86     0.19    4.30   2.36  10.50
sdc               0.40    60.20    0.40   64.80     6.40  1030.50
15.90     0.29    4.51   2.55  16.60
md1               0.00     0.00    0.00  147.20     0.00  1177.60
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  147.20     0.00  1177.60
8.00     2.34   15.89   6.72  98.90
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.42    0.00    0.37    0.00    0.00   99.21

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0.60    53.00    0.70   63.30    10.40   971.20
15.34     0.26    4.05   2.64  16.90
sdb               1.00    38.80    1.10   53.90    16.80   782.40
14.53     0.50    9.09   3.55  19.50
sdc               0.90    40.50    1.00   51.60    15.20   906.40
17.52     0.24    4.54   2.68  14.10
md1               0.00     0.00    0.00  142.60     0.00  1140.80
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  142.60     0.00  1140.80
8.00     2.33   16.33   6.90  98.40
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           4.39    0.00    3.08    1.74    0.00   90.79

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda              42.10   103.40   17.80   63.40   478.40  1374.40
22.82     0.84   10.38   4.25  34.50
sdb              42.80    95.10   17.20   48.30   480.00  1189.60
25.49     0.45    6.90   3.97  26.00
sdc              45.90   100.60   18.60   57.50   516.00  1304.90
23.93     0.60    7.83   4.34  33.00
md1               0.00     0.00   47.60  252.40   380.80  2017.10
7.99     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00   47.60  252.40   380.80  2017.10
7.99     7.29   24.30   3.28  98.50
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.99    0.00    1.22    0.00    0.00   97.79

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda              15.50    34.60   28.70   56.50   354.40   780.80
13.32     5.49   60.46   6.27  53.40
sdb              14.70    32.80   28.50   53.80   345.60   745.60
13.26     3.42   41.51   5.55  45.70
sdc              14.00    23.00   27.00   44.30   328.00   590.50
12.88     3.54   49.71   6.80  48.50
md1               0.00     0.00  101.40  119.50   811.20   912.60
7.80     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00  101.40  119.50   811.20   912.60
7.80    22.75   32.05   4.51  99.70
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.70    0.00    0.68    0.00    0.00   98.62

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               1.50    35.50    2.20   44.10    29.60   682.40
15.38     0.29   13.59   3.82  17.70
sdb               1.50    42.50    2.10   51.90    28.80   799.20
15.33     0.29    5.39   3.20  17.30
sdc               1.60    36.90    1.90   47.70    28.00   908.80
18.89     0.29    5.87   3.55  17.60
md1               0.00     0.00    0.00  116.60     0.00   932.10
7.99     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  116.60     0.00   932.10
7.99     2.73  157.86   8.45  98.50
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.76    0.00    0.47    0.00    0.00   98.77

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0.40    49.80    0.40   65.00     6.40   961.60
14.80     0.23    3.46   2.16  14.10
sdb               1.20    28.90    1.50   39.20    21.60   588.00
14.98     0.18    4.32   2.87  11.70
sdc               0.80    43.30    1.10   53.80    15.20   819.50
15.20     0.24    4.28   2.84  15.60
md1               0.00     0.00    0.00  131.80     0.00  1054.40
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  131.80     0.00  1054.40
8.00     2.00   15.14   7.48  98.60
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.70    0.00    0.75    0.00    0.00   98.55

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0.90    43.80    1.60   64.30    20.00   904.80
14.03     0.30    4.63   2.47  16.30
sdb               1.20    30.30    1.70   50.70    23.20   688.00
13.57     0.25    4.85   2.67  14.00
sdc               0.90    28.50    1.90   46.50    22.40   639.30
13.67     0.30    6.28   3.33  16.10
md1               0.00     0.00    0.00  124.40     0.00   994.00
7.99     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  124.40     0.00   994.00
7.99     2.19   17.60   7.97  99.10
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.74    0.00    0.34    0.00    0.00   98.92

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0.90    50.40    1.20   62.40    16.80   942.40
15.08     0.22    3.49   1.76  11.20
sdb               0.30    53.50    0.40   64.50     5.60   984.00
15.25     0.18    2.82   1.48   9.60
sdc               1.60    34.30    2.00   47.20    28.80   801.60
16.88     0.25    5.04   2.60  12.80
md1               0.00     0.00    0.00  148.40     0.00  1185.80
7.99     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  148.40     0.00  1185.80
7.99     2.11   14.23   6.70  99.50
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           1.18    0.00    0.35    0.00    0.00   98.47

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0.50    47.80    0.70   60.00     9.60   892.80
14.87     0.20    3.29   1.86  11.30
sdb               1.10    38.10    1.30   48.40    19.20   722.40
14.92     0.17    3.48   2.15  10.70
sdc               0.80    41.90    0.80   57.00    12.80   821.10
14.43     0.21    3.55   1.87  10.80
md1               0.00     0.00    0.00  140.80     0.00  1126.40
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  140.80     0.00  1126.40
8.00     1.98   14.06   7.03  99.00
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.38    0.00    0.38    0.00    0.00   99.25

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               1.40    42.10    1.50   55.20    23.20   808.80
14.67     0.18    3.26   1.94  11.00
sdb               0.70    39.90    0.80   52.10    12.00   766.40
14.71     0.20    3.71   2.04  10.80
sdc               1.20    38.40    1.40   49.20    20.80   730.50
14.85     0.22    4.45   2.29  11.60
md1               0.00     0.00    0.00  132.60     0.00  1060.80
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  132.60     0.00  1060.80
8.00     1.97   14.83   7.39  98.00
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.33    0.00    0.49    0.00    0.00   99.18

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0.80    40.70    1.10   47.20    15.20   735.20
15.54     0.20    4.12   2.28  11.00
sdb               0.50    47.00    0.90   53.60    11.20   836.80
15.56     0.22    4.04   2.51  13.70
sdc               0.90    40.80    1.00   48.40    15.20   857.60
17.67     0.21    4.23   2.31  11.40
md1               0.00     0.00    0.00  132.60     0.00  1060.80
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  132.60     0.00  1060.80
8.00     1.97   14.85   7.44  98.60
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.40    0.00    0.40    0.00    0.00   99.20

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0.90    42.90    1.00   53.60    15.20   800.80
14.95     0.21    3.75   1.96  10.70
sdb               1.10    42.10    1.10   50.80    17.60   772.00
15.21     0.18    3.56   2.02  10.50
sdc               0.70    50.20    0.80   60.20    12.00   911.50
15.14     0.20    3.25   1.90  11.60
md1               0.00     0.00    0.00  144.40     0.00  1155.20
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  144.40     0.00  1155.20
8.00     1.98   13.70   6.85  98.90
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.75    0.00    0.71    0.00    0.00   98.54

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0.70    45.00    0.70   49.70    11.20   783.20
15.76     0.18    3.63   2.12  10.70
sdb               0.70    38.00    0.70   43.20    11.20   675.20
15.64     0.17    3.78   2.10   9.20
sdc               1.00    40.20    1.00   46.20    16.00   716.10
15.51     0.20    4.13   2.27  10.70
md1               0.00     0.00    0.00  126.20     0.00  1009.60
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  126.20     0.00  1009.60
8.00     1.96   15.50   7.75  97.80
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.42    0.00    0.44    0.00    0.00   99.14

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0.50    41.50    0.90   53.80    11.20   780.00
14.46     0.20    3.62   2.21  12.10
sdb               0.70    39.80    0.90   60.40    12.80   819.20
13.57     0.24    3.92   1.89  11.60
sdc               0.90    32.70    1.30   46.60    17.60   763.20
16.30     0.23    4.84   2.61  12.50
md1               0.00     0.00    0.00  133.80     0.00  1070.40
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  133.80     0.00  1070.40
8.00     1.97   14.73   7.37  98.60
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.40    0.00    0.35    0.00    0.00   99.25

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0.80    43.20    1.00   55.50    14.40   809.60
14.58     0.20    3.52   1.98  11.20
sdb               0.60    45.20    0.80   56.30    11.20   832.00
14.77     0.20    3.43   1.89  10.80
sdc               1.10    39.80    1.10   53.70    17.60   767.30
14.32     0.20    3.72   2.04  11.20
md1               0.00     0.00    0.00  143.00     0.00  1144.00
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  143.00     0.00  1144.00
8.00     1.98   13.86   6.94  99.20
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.40    0.00    0.47    0.00    0.00   99.14

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               1.00    44.80    1.00   48.70    16.00   773.60
15.89     0.20    4.08   2.01  10.00
sdb               0.90    49.00    0.90   52.90    14.40   840.80
15.90     0.22    4.01   2.23  12.00
sdc               1.30    39.20    1.30   44.10    20.80   691.30
15.69     0.23    5.11   3.02  13.70
md1               0.00     0.00    0.00  134.40     0.00  1075.20
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  134.40     0.00  1075.20
8.00     1.95   14.51   7.25  97.40
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.45    0.00    0.45    0.00    0.00   99.10

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               1.40    35.30    1.50   44.00    23.20   679.20
15.44     0.24    5.34   3.21  14.60
sdb               0.30    51.10    0.50   59.00     6.40   925.60
15.66     0.24    4.12   2.69  16.00
sdc               1.10    29.50    1.40   39.20    20.00   705.60
17.87     0.24    5.94   3.52  14.30
md1               0.00     0.00    0.00  120.40     0.00   963.20
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  120.40     0.00   963.20
8.00     1.99   16.53   8.27  99.60
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.60    0.00    0.48    0.00    0.00   98.92

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0.50    47.20    0.50   51.50     8.00   819.20
15.91     0.27    5.21   3.75  19.50
sdb               1.40    45.00    1.50   49.00    23.20   781.60
15.94     0.23    4.48   2.95  14.90
sdc               1.60    30.20    1.70   34.40    26.40   545.70
15.85     0.18    5.01   2.99  10.80
md1               0.00     0.00    0.00  123.00     0.00   984.00
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00  123.00     0.00   984.00
8.00     1.96   15.92   7.95  97.80
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.50    0.00    0.47    0.00    0.00   99.03

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               5.10    21.60    6.30   36.80    91.20   548.00
14.83     0.46   10.56   8.12  35.00
sdb               6.10    20.80    7.10   35.90   105.60   534.40
14.88     0.38    8.79   6.67  28.70
sdc               3.80    22.80    4.70   38.70    68.00   572.10
14.75     0.43    9.86   7.26  31.50
md1               0.00     0.00    0.00   73.00     0.00   584.00
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00   73.00     0.00   584.00
8.00     1.98   27.15  13.62  99.40
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.78    0.00    0.60    0.00    0.00   98.62

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               5.50    27.10    6.10   37.10    92.80   569.60
15.33     0.39    9.03   6.18  26.70
sdb               7.20    23.60    8.10   33.50   122.40   513.60
15.29     0.33    7.96   5.84  24.30
sdc               7.00    25.80    7.90   35.70   119.20   628.80
17.16     0.42    9.59   7.02  30.60
md1               0.00     0.00    0.00   80.40     0.00   643.20
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00   80.40     0.00   643.20
8.00     1.99   24.70  12.39  99.60
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.21    0.00    0.37    0.00    0.00   99.42

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               4.70    25.70    5.60   34.80    82.40   524.00
15.01     0.29    7.13   5.25  21.20
sdb               4.60    26.20    5.40   35.80    80.00   535.20
14.93     0.28    6.77   4.59  18.90
sdc               4.90    25.70    5.70   35.60    84.80   529.10
14.86     0.35    8.38   6.54  27.00
md1               0.00     0.00    0.00   84.60     0.00   676.80
8.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00   84.60     0.00   676.80
8.00     1.98   23.43  11.69  98.90
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.30    0.00    0.28    0.00    0.00   99.41

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               2.20    17.20    2.40   23.40    36.80   357.60
15.29     0.20    7.83   5.97  15.40
sdb               2.10    15.20    2.50   20.70    36.80   320.00
15.38     0.16    7.03   4.91  11.40
sdc               3.90    13.00    4.20   19.30    64.80   290.50
15.12     0.26   10.94   8.30  19.50
md1               0.00     0.00    0.00   47.90     0.00   382.60
7.99     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00   47.90     0.00   382.60
7.99     1.12   23.36  11.77  56.40
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           0.39    0.00    0.24    0.02    0.00   99.35

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0.00     0.00    0.00    0.90     0.00    14.40
16.00     0.03   28.89   7.78   0.70
sdb               0.00     0.00    0.00    0.90     0.00    14.40
16.00     0.03   31.11   7.78   0.70
sdc               0.00     0.10    0.00    1.30     0.00    64.80
49.85     0.06   44.62  10.77   1.40
md1               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
md0               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
dm-0              0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00
sdd               0.00     0.00    0.00    0.00     0.00     0.00
0.00     0.00    0.00   0.00   0.00

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: Patch tags [was writeout stalls in current -git]
  2007-11-06 16:25                                             ` Patch tags [was writeout stalls in current -git] Jonathan Corbet
  2007-11-06 17:03                                               ` Balbir Singh
@ 2007-11-06 23:26                                               ` Adrian Bunk
  2007-11-09 16:10                                                 ` Jonathan Corbet
  1 sibling, 1 reply; 61+ messages in thread
From: Adrian Bunk @ 2007-11-06 23:26 UTC (permalink / raw)
  To: Jonathan Corbet; +Cc: Andrew Morton, linux-kernel

On Tue, Nov 06, 2007 at 09:25:12AM -0700, Jonathan Corbet wrote:
> Andrew wrote:
> 
> > > Reviewed-by: Fengguang Wu <wfg@mail.ustc.edu.cn> 
> > 
> > I would prefer Tested-by: :(
> 
> This seems like as good an opportunity as any to toss my patch tags
> document out there one more time.  I still think it's a good idea to
> codify some sort of consensus on what these tags mean... 

What's missing is a definition which of them are formal tags that must 
be explicitely given (look at point 13 in SubmittingPatches).

Signed-off-by: and Reviewed-by: are the formal tags someone must have 
explicitely given and that correspond to some statement.

OTOH, I can translate a "sounds fine" or "works for me" someone else 
gave me into an Acked-by: resp. Tested-by: tag.

> jon
>...
> --- /dev/null
> +++ b/Documentation/patch-tags
> @@ -0,0 +1,76 @@
> +Patches headed for the mainline may contain a variety of tags documenting
> +who played a hand in (or was at least aware of) their progress.  All of
> +these tags have the form:
> +
> +	Something-done-by: Full name <email@address> [optional random stuff]
> +
> +These tags are:
> +
> +From: 	   	The original author of the patch.  This tag will ensure
> +		that credit is properly given when somebody other than the
> +		original author submits the patch.
> +
> +Signed-off-by:	A person adding a Signed-off-by tag is attesting that the
> +		patch is, to the best of his or her knowledge, legally able
> +		to be merged into the mainline and distributed under the
> +		terms of the GNU General Public License, version 2.  See
> +		the Developer's Certificate of Origin, found in
> +		Documentation/SubmittingPatches, for the precise meaning of
> +		Signed-off-by.  This tag assures upstream maintainers that
> +		the provenance of the patch is known and allows the origin
> +		of the patch to be reviewed should copyright questions
> +		arise.
> +
> +Acked-by:	The person named (who should be an active developer in the
> +		area addressed by the patch) is aware of the patch and has
> +		no objection to its inclusion; it informs upstream
> +		maintainers that a certain degree of consensus on the patch
> +		as been achieved..  An Acked-by tag does not imply any
> +		involvement in the development of the patch or that a
> +		detailed review was done. 
> +
> +Reviewed-by:	The patch has been reviewed and found acceptable according
> +		to the Reviewer's Statement as found at the bottom of this
> +		file.  A Reviewed-by tag is a statement of opinion that the
> +		patch is an appropriate modification of the kernel without
> +		any remaining serious technical issues.  Any interested
> +		reviewer (who has done the work) can offer a Reviewed-by
> +		tag for a patch.  This tag serves to give credit to
> +		reviewers and to inform maintainers of the degree of review
> +		which has been done on the patch.
> +
> +Cc:		The person named was given the opportunity to comment on
> +		the patch.  This is the only tag which might be added
> +		without an explicit action by the person it names.  This
> +		tag documents that potentially interested parties have been
> +		included in the discussion.
> +
> +Tested-by:	The patch has been successfully tested (in some
> +		environment) by the person named.  This tag informs
> +		maintainers that some testing has been performed, provides
> +		a means to locate testers for future patches, and ensures
> +		credit for the testers.
> +
> +
> +----
> +
> +Reviewer's statement of oversight, v0.02
> +
> +By offering my Reviewed-by: tag, I state that:
> +
> + (a) I have carried out a technical review of this patch to evaluate its
> +     appropriateness and readiness for inclusion into the mainline kernel. 
> +
> + (b) Any problems, concerns, or questions relating to the patch have been
> +     communicated back to the submitter.  I am satisfied with the
> +     submitter's response to my comments.
> +
> + (c) While there may be things that could be improved with this submission,
> +     I believe that it is, at this time, (1) a worthwhile modification to
> +     the kernel, and (2) free of known issues which would argue against its
> +     inclusion.
> +
> + (d) While I have reviewed the patch and believe it to be sound, I do not
> +     (unless explicitly stated elsewhere) make any warranties or guarantees
> +     that it will achieve its stated purpose or function properly in any
> +     given situation.

cu
Adrian

-- 

       "Is there not promise of rain?" Ling Tan asked suddenly out
        of the darkness. There had been need of rain for many days.
       "Only a promise," Lao Er said.
                                       Pearl S. Buck - Dragon Seed


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-06 21:53                                             ` Torsten Kaiser
@ 2007-11-06 23:31                                               ` David Chinner
  2007-11-07  2:13                                                 ` David Chinner
  0 siblings, 1 reply; 61+ messages in thread
From: David Chinner @ 2007-11-06 23:31 UTC (permalink / raw)
  To: Torsten Kaiser
  Cc: Fengguang Wu, Peter Zijlstra, Maxim Levitsky, linux-kernel,
	Andrew Morton, David Chinner, linux-fsdevel

On Tue, Nov 06, 2007 at 10:53:25PM +0100, Torsten Kaiser wrote:
> On 11/6/07, David Chinner <dgc@sgi.com> wrote:
> > Rather than vmstat, can you use something like iostat to show how busy your
> > disks are?  i.e. are we seeing RMW cycles in the raid5 or some such issue.
> 
> Both "vmstat 10" and "iostat -x 10" output from this test:
> procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
>  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
>  2  0      0 3700592      0  85424    0    0    31    83  108  244  2  1 95  1
> -> emerge reads something, don't knwo for sure what...
>  1  0      0 3665352      0  87940    0    0   239     2  343  585  2  1 97  0
....
> 
> The last 20% of the btrace look more or less completely like this, no
> other programs do any IO...
> 
> 253,0    3   104626   526.293450729   974  C  WS 79344288 + 8 [0]
> 253,0    3   104627   526.293455078   974  C  WS 79344296 + 8 [0]
> 253,0    1    36469   444.513863133  1068  Q  WS 154998480 + 8 [xfssyncd]
> 253,0    1    36470   444.513863135  1068  Q  WS 154998488 + 8 [xfssyncd]
                                                ^^
Apparently we are doing synchronous writes. That would explain why
it is slow. We shouldn't be doing synchronous writes here. I'll see if
I can reproduce this.

<goes off and looks>

Yes, I can reproduce the sync writes coming out of xfssyncd. I'll
look into this further and send a patch when I have something concrete.

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-06 23:31                                               ` David Chinner
@ 2007-11-07  2:13                                                 ` David Chinner
  2007-11-07  7:15                                                   ` Torsten Kaiser
  0 siblings, 1 reply; 61+ messages in thread
From: David Chinner @ 2007-11-07  2:13 UTC (permalink / raw)
  To: David Chinner
  Cc: Torsten Kaiser, Fengguang Wu, Peter Zijlstra, Maxim Levitsky,
	linux-kernel, Andrew Morton, linux-fsdevel

On Wed, Nov 07, 2007 at 10:31:14AM +1100, David Chinner wrote:
> On Tue, Nov 06, 2007 at 10:53:25PM +0100, Torsten Kaiser wrote:
> > On 11/6/07, David Chinner <dgc@sgi.com> wrote:
> > > Rather than vmstat, can you use something like iostat to show how busy your
> > > disks are?  i.e. are we seeing RMW cycles in the raid5 or some such issue.
> > 
> > Both "vmstat 10" and "iostat -x 10" output from this test:
> > procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
> >  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
> >  2  0      0 3700592      0  85424    0    0    31    83  108  244  2  1 95  1
> > -> emerge reads something, don't knwo for sure what...
> >  1  0      0 3665352      0  87940    0    0   239     2  343  585  2  1 97  0
> ....
> > 
> > The last 20% of the btrace look more or less completely like this, no
> > other programs do any IO...
> > 
> > 253,0    3   104626   526.293450729   974  C  WS 79344288 + 8 [0]
> > 253,0    3   104627   526.293455078   974  C  WS 79344296 + 8 [0]
> > 253,0    1    36469   444.513863133  1068  Q  WS 154998480 + 8 [xfssyncd]
> > 253,0    1    36470   444.513863135  1068  Q  WS 154998488 + 8 [xfssyncd]
>                                                 ^^
> Apparently we are doing synchronous writes. That would explain why
> it is slow. We shouldn't be doing synchronous writes here. I'll see if
> I can reproduce this.
> 
> <goes off and looks>
> 
> Yes, I can reproduce the sync writes coming out of xfssyncd. I'll
> look into this further and send a patch when I have something concrete.

Ok, so it's not synchronous writes that we are doing - we're just
submitting bio's tagged as WRITE_SYNC to get the I/O issued quickly.
The "synchronous" nature appears to be coming from higher level
locking when reclaiming inodes (on the flush lock). It appears that
inode write clustering is failing completely so we are writing the
same block multiple times i.e. once for each inode in the cluster we
have to write.

This must be a side effect of some other change as we haven't
changed anything in the reclaim code recently.....

/me scurries off to run some tests 

Indeed it is. The patch below should fix the problem - the inode
clusters weren't getting set up properly when inodes were being
read in or allocated. This is a regression, introduced by this
mod:

http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=da353b0d64e070ae7c5342a0d56ec20ae9ef5cfb

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

---
 fs/xfs/xfs_iget.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Index: 2.6.x-xfs-new/fs/xfs/xfs_iget.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_iget.c	2007-11-02 13:44:46.000000000 +1100
+++ 2.6.x-xfs-new/fs/xfs/xfs_iget.c	2007-11-07 13:08:42.534440675 +1100
@@ -248,7 +248,7 @@ finish_inode:
 	icl = NULL;
 	if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq,
 							first_index, 1)) {
-		if ((iq->i_ino & mask) == first_index)
+		if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) == first_index)
 			icl = iq->i_cluster;
 	}
 

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-07  2:13                                                 ` David Chinner
@ 2007-11-07  7:15                                                   ` Torsten Kaiser
  2007-11-08  0:38                                                     ` David Chinner
  0 siblings, 1 reply; 61+ messages in thread
From: Torsten Kaiser @ 2007-11-07  7:15 UTC (permalink / raw)
  To: David Chinner
  Cc: Fengguang Wu, Peter Zijlstra, Maxim Levitsky, linux-kernel,
	Andrew Morton, linux-fsdevel

On 11/7/07, David Chinner <dgc@sgi.com> wrote:
> Ok, so it's not synchronous writes that we are doing - we're just
> submitting bio's tagged as WRITE_SYNC to get the I/O issued quickly.
> The "synchronous" nature appears to be coming from higher level
> locking when reclaiming inodes (on the flush lock). It appears that
> inode write clustering is failing completely so we are writing the
> same block multiple times i.e. once for each inode in the cluster we
> have to write.

Works for me. The only remaining stalls are sub second and look
completely valid, considering the amount of files being removed.

iostat 10 from this test:
 3  0      0 3500192    332 204956    0    0   105  8512 1809 6473  6 10 83  1
 0  0      0 3500200    332 204576    0    0     0  4367 1355 3712  2  6 92  0
 2  0      0 3504264    332 203528    0    0     0  6805 1912 4967  4  8 88  0
 0  0      0 3511632    332 203528    0    0     0  2843  805 1791  2  4 94  0
 0  0      0 3516852    332 203516    0    0     0  3375  879 2712  3  5 93  0
 0  0      0 3530544    332 202668    0    0   186   776  488 1152  4  2 89  4
 0  0      0 3574788    332 204960    0    0   226   326  358  787  0  1 98  0
 0  0      0 3576820    332 204960    0    0     0   376  332  737  0  0 99  0
 0  0      0 3578432    332 204960    0    0     0   356  293  606  1  1 99  0
 0  0      0 3580192    332 204960    0    0     0   101  104  384  0  0 99  0

I'm pleased to note that this is now much faster again.
Thanks!

Tested-by: Torsten Kaiser <just.for.lkml@googlemail.com>

CC's please note: It looks like this was really a different problem
then the 100% iowait that was seen with reiserfs.
Also the one complete stall I have seen is probably something else.
But I have not been able to reproduce this again with -mm and have
never seen this on mainline, so I will just ignore that single event
until I see it again.

Torsten

> ---
>  fs/xfs/xfs_iget.c |    2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> Index: 2.6.x-xfs-new/fs/xfs/xfs_iget.c
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_iget.c        2007-11-02 13:44:46.000000000 +1100
> +++ 2.6.x-xfs-new/fs/xfs/xfs_iget.c     2007-11-07 13:08:42.534440675 +1100
> @@ -248,7 +248,7 @@ finish_inode:
>         icl = NULL;
>         if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq,
>                                                         first_index, 1)) {
> -               if ((iq->i_ino & mask) == first_index)
> +               if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) == first_index)
>                         icl = iq->i_cluster;
>         }
>
>

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-07  7:15                                                   ` Torsten Kaiser
@ 2007-11-08  0:38                                                     ` David Chinner
  2007-11-20 13:16                                                       ` Damien Wyart
  0 siblings, 1 reply; 61+ messages in thread
From: David Chinner @ 2007-11-08  0:38 UTC (permalink / raw)
  To: Torsten Kaiser
  Cc: David Chinner, Fengguang Wu, Peter Zijlstra, Maxim Levitsky,
	linux-kernel, Andrew Morton, linux-fsdevel

On Wed, Nov 07, 2007 at 08:15:06AM +0100, Torsten Kaiser wrote:
> On 11/7/07, David Chinner <dgc@sgi.com> wrote:
> > Ok, so it's not synchronous writes that we are doing - we're just
> > submitting bio's tagged as WRITE_SYNC to get the I/O issued quickly.
> > The "synchronous" nature appears to be coming from higher level
> > locking when reclaiming inodes (on the flush lock). It appears that
> > inode write clustering is failing completely so we are writing the
> > same block multiple times i.e. once for each inode in the cluster we
> > have to write.
> 
> Works for me. The only remaining stalls are sub second and look
> completely valid, considering the amount of files being removed.
....
> Tested-by: Torsten Kaiser <just.for.lkml@googlemail.com>

Great - thanks for reporting the problem and testing the fix.

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: Patch tags [was writeout stalls in current -git] 
  2007-11-06 23:26                                               ` Adrian Bunk
@ 2007-11-09 16:10                                                 ` Jonathan Corbet
  2007-11-09 16:19                                                   ` Adrian Bunk
  0 siblings, 1 reply; 61+ messages in thread
From: Jonathan Corbet @ 2007-11-09 16:10 UTC (permalink / raw)
  To: Adrian Bunk; +Cc: Andrew Morton, linux-kernel

Adrian Bunk <bunk@kernel.org> wrote:

> What's missing is a definition which of them are formal tags that must 
> be explicitely given (look at point 13 in SubmittingPatches).
> 
> Signed-off-by: and Reviewed-by: are the formal tags someone must have 
> explicitely given and that correspond to some statement.
> 
> OTOH, I can translate a "sounds fine" or "works for me" someone else 
> gave me into an Acked-by: resp. Tested-by: tag.

The discussion of the Cc: tag says:

	This is the only tag which might be added without an explicit
	action by the person it names.

I think that addresses your comment, no?  Certainly I wouldn't feel that
I could add any of the other tags to a patch I posted - that's the job
of the person named there.  

jon

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: Patch tags [was writeout stalls in current -git]
  2007-11-09 16:10                                                 ` Jonathan Corbet
@ 2007-11-09 16:19                                                   ` Adrian Bunk
  0 siblings, 0 replies; 61+ messages in thread
From: Adrian Bunk @ 2007-11-09 16:19 UTC (permalink / raw)
  To: Jonathan Corbet; +Cc: Andrew Morton, linux-kernel

On Fri, Nov 09, 2007 at 09:10:47AM -0700, Jonathan Corbet wrote:
> Adrian Bunk <bunk@kernel.org> wrote:
> 
> > What's missing is a definition which of them are formal tags that must 
> > be explicitely given (look at point 13 in SubmittingPatches).
> > 
> > Signed-off-by: and Reviewed-by: are the formal tags someone must have 
> > explicitely given and that correspond to some statement.
> > 
> > OTOH, I can translate a "sounds fine" or "works for me" someone else 
> > gave me into an Acked-by: resp. Tested-by: tag.
> 
> The discussion of the Cc: tag says:
> 
> 	This is the only tag which might be added without an explicit
> 	action by the person it names.
> 
> I think that addresses your comment, no?  Certainly I wouldn't feel that
> I could add any of the other tags to a patch I posted - that's the job
> of the person named there.  

Acked-by: and Tested-by: do require explicit actions by the person they 
name, but they are not required to explicitely give this tag.

If a user said "the patch works for me" I would consider it overly 
bureaucratic to ask the user for a formal tag.

> jon

cu
Adrian

-- 

       "Is there not promise of rain?" Ling Tan asked suddenly out
        of the darkness. There had been need of rain for many days.
       "Only a promise," Lao Er said.
                                       Pearl S. Buck - Dragon Seed


^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-08  0:38                                                     ` David Chinner
@ 2007-11-20 13:16                                                       ` Damien Wyart
  2007-11-20 21:09                                                         ` David Chinner
  0 siblings, 1 reply; 61+ messages in thread
From: Damien Wyart @ 2007-11-20 13:16 UTC (permalink / raw)
  To: David Chinner
  Cc: Torsten Kaiser, Fengguang Wu, Peter Zijlstra, Maxim Levitsky,
	linux-kernel, Andrew Morton, linux-fsdevel

Hello,

> > > Ok, so it's not synchronous writes that we are doing - we're just
> > > submitting bio's tagged as WRITE_SYNC to get the I/O issued
> > > quickly. The "synchronous" nature appears to be coming from higher
> > > level locking when reclaiming inodes (on the flush lock). It
> > > appears that inode write clustering is failing completely so we
> > > are writing the same block multiple times i.e. once for each inode
> > > in the cluster we have to write.

> > Works for me. The only remaining stalls are sub second and look
> > completely valid, considering the amount of files being removed.
> ....
> > Tested-by: Torsten Kaiser <just.for.lkml@googlemail.com>

* David Chinner <dgc@sgi.com> [2007-11-08 11:38]:
> Great - thanks for reporting the problem and testing the fix.

This patch has not yet made its way into 2.6.24 (rc3). Is it intended?
Maybe the fix can wait for 2.6.25, but wanted to make sure...

-- 
Damien Wyart

^ permalink raw reply	[flat|nested] 61+ messages in thread

* Re: writeout stalls in current -git
  2007-11-20 13:16                                                       ` Damien Wyart
@ 2007-11-20 21:09                                                         ` David Chinner
  0 siblings, 0 replies; 61+ messages in thread
From: David Chinner @ 2007-11-20 21:09 UTC (permalink / raw)
  To: Damien Wyart
  Cc: David Chinner, Torsten Kaiser, Fengguang Wu, Peter Zijlstra,
	Maxim Levitsky, linux-kernel, Andrew Morton, linux-fsdevel

On Tue, Nov 20, 2007 at 02:16:17PM +0100, Damien Wyart wrote:
> Hello,
> 
> > > > Ok, so it's not synchronous writes that we are doing - we're just
> > > > submitting bio's tagged as WRITE_SYNC to get the I/O issued quickly.
> > > > The "synchronous" nature appears to be coming from higher level
> > > > locking when reclaiming inodes (on the flush lock). It appears that
> > > > inode write clustering is failing completely so we are writing the
> > > > same block multiple times i.e. once for each inode in the cluster we
> > > > have to write.
> 
> > > Works for me. The only remaining stalls are sub second and look
> > > completely valid, considering the amount of files being removed.
> > ....
> > > Tested-by: Torsten Kaiser <just.for.lkml@googlemail.com>
> 
> * David Chinner <dgc@sgi.com> [2007-11-08 11:38]:
> > Great - thanks for reporting the problem and testing the fix.
> 
> This patch has not yet made its way into 2.6.24 (rc3). Is it intended?
> Maybe the fix can wait for 2.6.25, but wanted to make sure...

The patch is in the XFS dev tree being QA'd, and we will push it
to 2.6.24-rcX in the next few days.

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

^ permalink raw reply	[flat|nested] 61+ messages in thread

end of thread, other threads:[~2007-11-20 21:10 UTC | newest]

Thread overview: 61+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-10-22  6:22 100% iowait on one of cpus in current -git Maxim Levitsky
2007-10-22  9:11 ` Paolo Ornati
2007-10-22  9:43   ` Maxim Levitsky
2007-10-22  9:41 ` Peter Zijlstra
2007-10-22  9:59   ` Maxim Levitsky
2007-10-22 10:22     ` Peter Zijlstra
2007-10-22 10:40       ` Maxim Levitsky
     [not found]         ` <393050530.03287@ustc.edu.cn>
2007-10-22 10:55           ` Fengguang Wu
2007-10-22 10:58           ` Maxim Levitsky
     [not found]             ` <393051953.24752@ustc.edu.cn>
2007-10-22 11:19               ` Fengguang Wu
2007-10-22 12:21               ` Maxim Levitsky
     [not found]                 ` <393056632.00561@ustc.edu.cn>
2007-10-22 12:37                   ` Fengguang Wu
2007-10-22 13:05                   ` Maxim Levitsky
     [not found]                     ` <393058650.02921@ustc.edu.cn>
2007-10-22 13:10                       ` Fengguang Wu
     [not found]                         ` <393060478.03650@ustc.edu.cn>
2007-10-22 13:41                           ` Fengguang Wu
2007-10-31 15:22                           ` Torsten Kaiser
     [not found]                             ` <393903856.06449@ustc.edu.cn>
2007-11-01  7:57                               ` Fengguang Wu
2007-11-01 18:20                               ` Torsten Kaiser
2007-11-01 19:00                                 ` Torsten Kaiser
     [not found]                                   ` <393970108.15915@ustc.edu.cn>
2007-11-02  2:21                                     ` writeout stalls " Fengguang Wu
2007-11-02 10:15                                       ` Peter Zijlstra
2007-11-02 19:22                                         ` Torsten Kaiser
2007-11-02 20:43                                           ` David Chinner
2007-11-02 21:02                                             ` Torsten Kaiser
2007-11-04 11:19                                             ` Torsten Kaiser
2007-11-05  1:45                                               ` David Chinner
2007-11-05  7:01                                                 ` Torsten Kaiser
2007-11-05 18:27                                                 ` Torsten Kaiser
2007-11-06  4:25                                                   ` David Chinner
2007-11-06  7:10                                                     ` Torsten Kaiser
2007-11-06 19:01                                                     ` Peter Zijlstra
2007-11-06 20:26                                                       ` Torsten Kaiser
     [not found]                                           ` <394340668.31055@ustc.edu.cn>
2007-11-06  9:17                                             ` Fengguang Wu
2007-11-06 21:53                                             ` Torsten Kaiser
2007-11-06 23:31                                               ` David Chinner
2007-11-07  2:13                                                 ` David Chinner
2007-11-07  7:15                                                   ` Torsten Kaiser
2007-11-08  0:38                                                     ` David Chinner
2007-11-20 13:16                                                       ` Damien Wyart
2007-11-20 21:09                                                         ` David Chinner
     [not found]                                         ` <393999615.15343@ustc.edu.cn>
2007-11-02 10:33                                           ` Fengguang Wu
2007-11-05 23:57                                           ` Andrew Morton
2007-11-06 10:20                                             ` Peter Zijlstra
2007-11-06 16:25                                             ` Patch tags [was writeout stalls in current -git] Jonathan Corbet
2007-11-06 17:03                                               ` Balbir Singh
2007-11-06 23:26                                               ` Adrian Bunk
2007-11-09 16:10                                                 ` Jonathan Corbet
2007-11-09 16:19                                                   ` Adrian Bunk
2007-11-02  7:50                                     ` writeout stalls in current -git Torsten Kaiser
     [not found]                                 ` <393968464.13148@ustc.edu.cn>
2007-11-02  1:54                                   ` Fengguang Wu
2007-11-02  7:42                                   ` Torsten Kaiser
     [not found]                                     ` <393989953.22199@ustc.edu.cn>
2007-11-02  7:52                                       ` Fengguang Wu
2007-11-02 17:47                                       ` Torsten Kaiser
2007-10-22 13:28                       ` 100% iowait on one of cpus " Maxim Levitsky
     [not found]                 ` <393126119.26275@ustc.edu.cn>
2007-10-23  7:55                   ` [PATCH] reiserfs: don't drop PG_dirty when releasing sub-page-sized dirty file Fengguang Wu
2007-10-23 10:07                   ` Peter Zijlstra
     [not found]                     ` <393140585.27414@ustc.edu.cn>
2007-10-23 11:56                       ` Fengguang Wu
2007-10-23 14:10                       ` Chris Mason
     [not found]                         ` <393150419.31806@ustc.edu.cn>
2007-10-23 14:40                           ` Fengguang Wu
2007-10-23 10:17                   ` Maxim Levitsky
     [not found]                     ` <393150504.32739@ustc.edu.cn>
2007-10-23 14:41                       ` Fengguang Wu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).