From cf8d53810877b6de0514c37f3b5846c4019c6fd1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Apr 2007 16:59:10 -0700 Subject: [PATCH] more patches added to 2.6.20 queue --- ...-by-incorrect-request_sense-handling.patch | 47 ++++ ...x-bogon-in-dev-mem-mmap-ing-on-nommu.patch | 34 +++ ...-processes-wrongly-thought-mpol_bind.patch | 38 +++ ...pointer-access-in-8250-serial-driver.patch | 100 ++++++++ ...gration-fix-nr_file_pages-accounting.patch | 55 ++++ ...-fix-xattr-root-locking-refcount-bug.patch | 173 +++++++++++++ queue-2.6.20/series | 7 + ...he-structure-members-alignment-issue.patch | 241 ++++++++++++++++++ 8 files changed, 695 insertions(+) create mode 100644 queue-2.6.20/3w-xxxx-fix-oops-caused-by-incorrect-request_sense-handling.patch create mode 100644 queue-2.6.20/fix-bogon-in-dev-mem-mmap-ing-on-nommu.patch create mode 100644 queue-2.6.20/fix-oom-killing-processes-wrongly-thought-mpol_bind.patch create mode 100644 queue-2.6.20/fix-possible-null-pointer-access-in-8250-serial-driver.patch create mode 100644 queue-2.6.20/page-migration-fix-nr_file_pages-accounting.patch create mode 100644 queue-2.6.20/reiserfs-fix-xattr-root-locking-refcount-bug.patch create mode 100644 queue-2.6.20/taskstats-fix-the-structure-members-alignment-issue.patch diff --git a/queue-2.6.20/3w-xxxx-fix-oops-caused-by-incorrect-request_sense-handling.patch b/queue-2.6.20/3w-xxxx-fix-oops-caused-by-incorrect-request_sense-handling.patch new file mode 100644 index 00000000000..23bb62ae0fc --- /dev/null +++ b/queue-2.6.20/3w-xxxx-fix-oops-caused-by-incorrect-request_sense-handling.patch @@ -0,0 +1,47 @@ +From 6e3b2bbb197eb12b2bef35bcf2ac3bd6a5facab2 Mon Sep 17 00:00:00 2001 +From: James Bottomley +Date: Fri, 6 Apr 2007 11:14:56 -0500 +Subject: [SCSI] 3w-xxxx: fix oops caused by incorrect REQUEST_SENSE handling + + +From: James Bottomley + +3w-xxxx emulates a REQUEST_SENSE response by simply returning nothing. +Unfortunately, it's assuming that the REQUEST_SENSE command is +implemented with use_sg == 0, which is no longer the case. The oops +occurs because it's clearing the scatterlist in request_buffer instead +of the memory region. + +This is fixed by using tw_transfer_internal() to transfer correctly to +the scatterlist. + +Acked-by: adam radford +Signed-off-by: James Bottomley +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/3w-xxxx.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/3w-xxxx.c ++++ b/drivers/scsi/3w-xxxx.c +@@ -1864,10 +1864,17 @@ static int tw_scsiop_read_write(TW_Devic + /* This function will handle the request sense scsi command */ + static int tw_scsiop_request_sense(TW_Device_Extension *tw_dev, int request_id) + { ++ char request_buffer[18]; ++ + dprintk(KERN_NOTICE "3w-xxxx: tw_scsiop_request_sense()\n"); + +- /* For now we just zero the request buffer */ +- memset(tw_dev->srb[request_id]->request_buffer, 0, tw_dev->srb[request_id]->request_bufflen); ++ memset(request_buffer, 0, sizeof(request_buffer)); ++ request_buffer[0] = 0x70; /* Immediate fixed format */ ++ request_buffer[7] = 10; /* minimum size per SPC: 18 bytes */ ++ /* leave all other fields zero, giving effectively NO_SENSE return */ ++ tw_transfer_internal(tw_dev, request_id, request_buffer, ++ sizeof(request_buffer)); ++ + tw_dev->state[request_id] = TW_S_COMPLETED; + tw_state_request_finish(tw_dev, request_id); + diff --git a/queue-2.6.20/fix-bogon-in-dev-mem-mmap-ing-on-nommu.patch b/queue-2.6.20/fix-bogon-in-dev-mem-mmap-ing-on-nommu.patch new file mode 100644 index 00000000000..9ba1929c672 --- /dev/null +++ b/queue-2.6.20/fix-bogon-in-dev-mem-mmap-ing-on-nommu.patch @@ -0,0 +1,34 @@ +From stable-bounces@linux.kernel.org Mon Apr 16 22:54:33 2007 +From: Benjamin Herrenschmidt +Date: Mon, 16 Apr 2007 22:53:16 -0700 +Subject: fix bogon in /dev/mem mmap'ing on nommu +To: torvalds@linux-foundation.org +Cc: dhowells@redhat.com, benh@kernel.crashing.org, akpm@linux-foundation.org, stable@kernel.org +Message-ID: <200704170553.l3H5rGZs021576@shell0.pdx.osdl.net> + +From: Benjamin Herrenschmidt + +While digging through my MAP_FIXED changes, I found that rather obvious +bug in /dev/mem mmap implementation for nommu archs. get_unmapped_area() +is expected to return an address, not a pfn. + +Signed-off-by: Benjamin Herrenschmidt +Acked-By: David Howells +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/char/mem.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/char/mem.c ++++ b/drivers/char/mem.c +@@ -248,7 +248,7 @@ static unsigned long get_unmapped_area_m + { + if (!valid_mmap_phys_addr_range(pgoff, len)) + return (unsigned long) -EINVAL; +- return pgoff; ++ return pgoff << PAGE_SHIFT; + } + + /* can't do an in-place private mapping if there's no MMU */ diff --git a/queue-2.6.20/fix-oom-killing-processes-wrongly-thought-mpol_bind.patch b/queue-2.6.20/fix-oom-killing-processes-wrongly-thought-mpol_bind.patch new file mode 100644 index 00000000000..36ff6e8f985 --- /dev/null +++ b/queue-2.6.20/fix-oom-killing-processes-wrongly-thought-mpol_bind.patch @@ -0,0 +1,38 @@ +From stable-bounces@linux.kernel.org Mon Apr 23 14:45:11 2007 +From: Hugh Dickins +Date: Mon, 23 Apr 2007 14:41:02 -0700 +Subject: fix OOM killing processes wrongly thought MPOL_BIND +To: torvalds@linux-foundation.org +Cc: bill.irwin@oracle.com, hugh@veritas.com, akpm@linux-foundation.org, stable@kernel.org, kamezawa.hiroyu@jp.fujitsu.com, clameter@sgi.com +Message-ID: <200704232143.l3NLheqv025693@shell0.pdx.osdl.net> + + +From: Hugh Dickins + +I only have CONFIG_NUMA=y for build testing: surprised when trying a memhog +to see lots of other processes killed with "No available memory +(MPOL_BIND)". memhog is killed correctly once we initialize nodemask in +constrained_alloc(). + +Signed-off-by: Hugh Dickins +Acked-by: Christoph Lameter +Acked-by: William Irwin +Acked-by: KAMEZAWA Hiroyuki +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman + +--- + mm/oom_kill.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/mm/oom_kill.c ++++ b/mm/oom_kill.c +@@ -176,6 +176,8 @@ static inline int constrained_alloc(stru + struct zone **z; + nodemask_t nodes; + int node; ++ ++ nodes_clear(nodes); + /* node has memory ? */ + for_each_online_node(node) + if (NODE_DATA(node)->node_present_pages) diff --git a/queue-2.6.20/fix-possible-null-pointer-access-in-8250-serial-driver.patch b/queue-2.6.20/fix-possible-null-pointer-access-in-8250-serial-driver.patch new file mode 100644 index 00000000000..fb4de5052f1 --- /dev/null +++ b/queue-2.6.20/fix-possible-null-pointer-access-in-8250-serial-driver.patch @@ -0,0 +1,100 @@ +From stable-bounces@linux.kernel.org Mon Apr 23 14:45:17 2007 +From: Taku Izumi +Date: Mon, 23 Apr 2007 14:41:00 -0700 +Subject: Fix possible NULL pointer access in 8250 serial driver +To: torvalds@linux-foundation.org +Cc: akpm@linux-foundation.org, izumi2005@soft.fujitsu.com, stable@kernel.org, rmk@arm.linux.org.uk, kaneshige.kenji@jp.fujitsu.com +Message-ID: <200704232143.l3NLhdSV025689@shell0.pdx.osdl.net> + + +From: Taku Izumi + +I encountered the following kernel panic. The cause of this problem was +NULL pointer access in check_modem_status() in 8250.c. I confirmed this +problem is fixed by the attached patch, but I don't know this is the +correct fix. + +sadc[4378]: NaT consumption 2216203124768 [1] +Modules linked in: binfmt_misc dm_mirror dm_mod thermal processor fan +container button sg e100 eepro100 mii ehci_hcd ohci_hcd + +Pid: 4378, CPU 0, comm: sadc +psr : 00001210085a2010 ifs : 8000000000000289 ip : [] +Not tainted +ip is at check_modem_status+0xf1/0x360 +unat: 0000000000000000 pfs : 0000000000000289 rsc : 0000000000000003 +rnat: 800000000000cc18 bsps: 0000000000000000 pr : 0000000000aa6a99 +ldrs: 0000000000000000 ccv : 0000000000000000 fpsr: 0009804c8a70033f +csd : 0000000000000000 ssd : 0000000000000000 +b0 : a000000100481fb0 b6 : a0000001004822e0 b7 : a000000100477f20 +f6 : 1003e2222222222222222 f7 : 0ffdba200000000000000 +f8 : 100018000000000000000 f9 : 10002a000000000000000 +f10 : 0fffdccccccccc8c00000 f11 : 1003e0000000000000000 +r1 : a000000100b9af40 r2 : 0000000000000008 r3 : a000000100ad4e21 +r8 : 00000000000000bb r9 : 0000000000000001 r10 : 0000000000000000 +r11 : a000000100ad4d58 r12 : e0000000037b7df0 r13 : e0000000037b0000 +r14 : 0000000000000001 r15 : 0000000000000018 r16 : a000000100ad4d6c +r17 : 0000000000000000 r18 : 0000000000000000 r19 : 0000000000000000 +r20 : a00000010099bc88 r21 : 00000000000000bb r22 : 00000000000000bb +r23 : c003fffffc0ff3fe r24 : c003fffffc000000 r25 : 00000000000ff3fe +r26 : a0000001009b7ad0 r27 : 0000000000000001 r28 : a0000001009b7ad8 +r29 : 0000000000000000 r30 : a0000001009b7ad0 r31 : a0000001009b7ad0 + +Call Trace: +[] show_stack+0x40/0xa0 +sp=e0000000037b7810 bsp=e0000000037b1118 +[] show_regs+0x840/0x880 +sp=e0000000037b79e0 bsp=e0000000037b10c0 +[] die+0x1c0/0x2c0 +sp=e0000000037b79e0 bsp=e0000000037b1078 +[] die_if_kernel+0x50/0x80 +sp=e0000000037b7a00 bsp=e0000000037b1048 +[] ia64_fault+0x11e0/0x1300 +sp=e0000000037b7a00 bsp=e0000000037b0fe8 +[] ia64_leave_kernel+0x0/0x280 +sp=e0000000037b7c20 bsp=e0000000037b0fe8 +[] check_modem_status+0xf0/0x360 +sp=e0000000037b7df0 bsp=e0000000037b0fa0 +[] serial8250_get_mctrl+0x20/0xa0 +sp=e0000000037b7df0 bsp=e0000000037b0f80 +[] uart_read_proc+0x250/0x860 +sp=e0000000037b7df0 bsp=e0000000037b0ee0 +[] proc_file_read+0x1d0/0x4c0 +sp=e0000000037b7e10 bsp=e0000000037b0e80 +[] vfs_read+0x1b0/0x300 +sp=e0000000037b7e20 bsp=e0000000037b0e30 +[] sys_read+0x70/0xe0 +sp=e0000000037b7e20 bsp=e0000000037b0db0 +[] ia64_ret_from_syscall+0x0/0x20 +sp=e0000000037b7e30 bsp=e0000000037b0db0 +[] __kernel_syscall_via_break+0x0/0x20 +sp=e0000000037b8000 bsp=e0000000037b0db0 + + +Fix the possible NULL pointer access in check_modem_status() in 8250.c. The +check_modem_status() would access 'info' member of uart_port structure, but it +is not initialized before uart_open() is called. The check_modem_status() can +be called through /proc/tty/driver/serial before uart_open() is called. + +Signed-off-by: Kenji Kaneshige +Signed-off-by: Taku Izumi +Cc: Russell King +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/serial/8250.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/serial/8250.c ++++ b/drivers/serial/8250.c +@@ -1289,7 +1289,8 @@ static unsigned int check_modem_status(s + { + unsigned int status = serial_in(up, UART_MSR); + +- if (status & UART_MSR_ANY_DELTA && up->ier & UART_IER_MSI) { ++ if (status & UART_MSR_ANY_DELTA && up->ier & UART_IER_MSI && ++ up->port.info != NULL) { + if (status & UART_MSR_TERI) + up->port.icount.rng++; + if (status & UART_MSR_DDSR) diff --git a/queue-2.6.20/page-migration-fix-nr_file_pages-accounting.patch b/queue-2.6.20/page-migration-fix-nr_file_pages-accounting.patch new file mode 100644 index 00000000000..07ae5125532 --- /dev/null +++ b/queue-2.6.20/page-migration-fix-nr_file_pages-accounting.patch @@ -0,0 +1,55 @@ +From stable-bounces@linux.kernel.org Mon Apr 23 14:45:22 2007 +From: Christoph Lameter +Date: Mon, 23 Apr 2007 14:41:09 -0700 +Subject: page migration: fix NR_FILE_PAGES accounting +To: torvalds@linux-foundation.org +Cc: solo@google.com, akpm@linux-foundation.org, mbligh@mbligh.org, stable@kernel.org, clameter@sgi.com +Message-ID: <200704232143.l3NLhlfI025729@shell0.pdx.osdl.net> + +From: Christoph Lameter + +NR_FILE_PAGES must be accounted for depending on the zone that the page +belongs to. If we replace the page in the radix tree then we may have to +shift the count to another zone. + +Suggested-by: Ethan Solomita +Cc: Martin Bligh +Signed-off-by: Christoph Lameter +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman + +--- + mm/migrate.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -297,7 +297,7 @@ static int migrate_page_move_mapping(str + void **pslot; + + if (!mapping) { +- /* Anonymous page */ ++ /* Anonymous page without mapping */ + if (page_count(page) != 1) + return -EAGAIN; + return 0; +@@ -333,6 +333,19 @@ static int migrate_page_move_mapping(str + */ + __put_page(page); + ++ /* ++ * If moved to a different zone then also account ++ * the page for that zone. Other VM counters will be ++ * taken care of when we establish references to the ++ * new page and drop references to the old page. ++ * ++ * Note that anonymous pages are accounted for ++ * via NR_FILE_PAGES and NR_ANON_PAGES if they ++ * are mapped to swap space. ++ */ ++ __dec_zone_page_state(page, NR_FILE_PAGES); ++ __inc_zone_page_state(newpage, NR_FILE_PAGES); ++ + write_unlock_irq(&mapping->tree_lock); + + return 0; diff --git a/queue-2.6.20/reiserfs-fix-xattr-root-locking-refcount-bug.patch b/queue-2.6.20/reiserfs-fix-xattr-root-locking-refcount-bug.patch new file mode 100644 index 00000000000..4da6f0b10f8 --- /dev/null +++ b/queue-2.6.20/reiserfs-fix-xattr-root-locking-refcount-bug.patch @@ -0,0 +1,173 @@ +From stable-bounces@linux.kernel.org Mon Apr 23 14:45:38 2007 +From: Jeff Mahoney +Date: Mon, 23 Apr 2007 14:41:17 -0700 +Subject: reiserfs: fix xattr root locking/refcount bug +To: torvalds@linux-foundation.org +Cc: a.righi@cineca.it, vs@namesys.com, jeffm@suse.com, zam@namesys.com, edward@namesys.com, akpm@linux-foundation.org, stable@kernel.org +Message-ID: <200704232143.l3NLhtZG025757@shell0.pdx.osdl.net> + + +From: Jeff Mahoney + +The listxattr() and getxattr() operations are only protected by a read +lock. As a result, if either of these operations run in parallel, a race +condition exists where the xattr_root will end up being cached twice, which +results in the leaking of a reference and a BUG() on umount. + +This patch refactors get_xa_root(), __get_xa_root(), and create_xa_root(), +into one get_xa_root() function that takes the appropriate locking around +the entire critical section. + +Reported, diagnosed and tested by Andrea Righi + +Signed-off-by: Jeff Mahoney +Cc: Andrea Righi +Cc: "Vladimir V. Saveliev" +Cc: Edward Shishkin +Cc: Alex Zarochentsev +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman + +--- + fs/reiserfs/xattr.c | 92 +++++++++++++--------------------------------------- + 1 file changed, 24 insertions(+), 68 deletions(-) + +--- a/fs/reiserfs/xattr.c ++++ b/fs/reiserfs/xattr.c +@@ -54,82 +54,48 @@ + static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char + *prefix); + +-static struct dentry *create_xa_root(struct super_block *sb) ++/* Returns the dentry referring to the root of the extended attribute ++ * directory tree. If it has already been retrieved, it is used. If it ++ * hasn't been created and the flags indicate creation is allowed, we ++ * attempt to create it. On error, we return a pointer-encoded error. ++ */ ++static struct dentry *get_xa_root(struct super_block *sb, int flags) + { + struct dentry *privroot = dget(REISERFS_SB(sb)->priv_root); + struct dentry *xaroot; + + /* This needs to be created at mount-time */ + if (!privroot) +- return ERR_PTR(-EOPNOTSUPP); ++ return ERR_PTR(-ENODATA); + +- xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME)); +- if (IS_ERR(xaroot)) { ++ mutex_lock(&privroot->d_inode->i_mutex); ++ if (REISERFS_SB(sb)->xattr_root) { ++ xaroot = dget(REISERFS_SB(sb)->xattr_root); + goto out; +- } else if (!xaroot->d_inode) { +- int err; +- mutex_lock(&privroot->d_inode->i_mutex); +- err = +- privroot->d_inode->i_op->mkdir(privroot->d_inode, xaroot, +- 0700); +- mutex_unlock(&privroot->d_inode->i_mutex); +- +- if (err) { +- dput(xaroot); +- dput(privroot); +- return ERR_PTR(err); +- } +- REISERFS_SB(sb)->xattr_root = dget(xaroot); + } + +- out: +- dput(privroot); +- return xaroot; +-} +- +-/* This will return a dentry, or error, refering to the xa root directory. +- * If the xa root doesn't exist yet, the dentry will be returned without +- * an associated inode. This dentry can be used with ->mkdir to create +- * the xa directory. */ +-static struct dentry *__get_xa_root(struct super_block *s) +-{ +- struct dentry *privroot = dget(REISERFS_SB(s)->priv_root); +- struct dentry *xaroot = NULL; +- +- if (IS_ERR(privroot) || !privroot) +- return privroot; +- + xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME)); + if (IS_ERR(xaroot)) { + goto out; + } else if (!xaroot->d_inode) { +- dput(xaroot); +- xaroot = NULL; +- goto out; ++ int err = -ENODATA; ++ if (flags == 0 || flags & XATTR_CREATE) ++ err = privroot->d_inode->i_op->mkdir(privroot->d_inode, ++ xaroot, 0700); ++ if (err) { ++ dput(xaroot); ++ xaroot = ERR_PTR(err); ++ goto out; ++ } + } +- +- REISERFS_SB(s)->xattr_root = dget(xaroot); ++ REISERFS_SB(sb)->xattr_root = dget(xaroot); + + out: ++ mutex_unlock(&privroot->d_inode->i_mutex); + dput(privroot); + return xaroot; + } + +-/* Returns the dentry (or NULL) referring to the root of the extended +- * attribute directory tree. If it has already been retrieved, it is used. +- * Otherwise, we attempt to retrieve it from disk. It may also return +- * a pointer-encoded error. +- */ +-static inline struct dentry *get_xa_root(struct super_block *s) +-{ +- struct dentry *dentry = dget(REISERFS_SB(s)->xattr_root); +- +- if (!dentry) +- dentry = __get_xa_root(s); +- +- return dentry; +-} +- + /* Opens the directory corresponding to the inode's extended attribute store. + * If flags allow, the tree to the directory may be created. If creation is + * prohibited, -ENODATA is returned. */ +@@ -138,21 +104,11 @@ static struct dentry *open_xa_dir(const + struct dentry *xaroot, *xadir; + char namebuf[17]; + +- xaroot = get_xa_root(inode->i_sb); +- if (IS_ERR(xaroot)) { ++ xaroot = get_xa_root(inode->i_sb, flags); ++ if (IS_ERR(xaroot)) + return xaroot; +- } else if (!xaroot) { +- if (flags == 0 || flags & XATTR_CREATE) { +- xaroot = create_xa_root(inode->i_sb); +- if (IS_ERR(xaroot)) +- return xaroot; +- } +- if (!xaroot) +- return ERR_PTR(-ENODATA); +- } + + /* ok, we have xaroot open */ +- + snprintf(namebuf, sizeof(namebuf), "%X.%X", + le32_to_cpu(INODE_PKEY(inode)->k_objectid), + inode->i_generation); +@@ -821,7 +777,7 @@ int reiserfs_delete_xattrs(struct inode + + /* Leftovers besides . and .. -- that's not good. */ + if (dir->d_inode->i_nlink <= 2) { +- root = get_xa_root(inode->i_sb); ++ root = get_xa_root(inode->i_sb, XATTR_REPLACE); + reiserfs_write_lock_xattrs(inode->i_sb); + err = vfs_rmdir(root->d_inode, dir); + reiserfs_write_unlock_xattrs(inode->i_sb); diff --git a/queue-2.6.20/series b/queue-2.6.20/series index e0f80c38017..73a102ce49b 100644 --- a/queue-2.6.20/series +++ b/queue-2.6.20/series @@ -19,3 +19,10 @@ exec.c-fix-coredump-to-pipe-problem-and-obscure-security-hole.patch nfs-fix-an-oops-in-nfs_setattr.patch x86-don-t-probe-for-ddc-on-vbe1.2.patch vt-fix-potential-race-in-vt_waitactive-handler.patch +3w-xxxx-fix-oops-caused-by-incorrect-request_sense-handling.patch +fix-bogon-in-dev-mem-mmap-ing-on-nommu.patch +fix-oom-killing-processes-wrongly-thought-mpol_bind.patch +fix-possible-null-pointer-access-in-8250-serial-driver.patch +page-migration-fix-nr_file_pages-accounting.patch +taskstats-fix-the-structure-members-alignment-issue.patch +reiserfs-fix-xattr-root-locking-refcount-bug.patch diff --git a/queue-2.6.20/taskstats-fix-the-structure-members-alignment-issue.patch b/queue-2.6.20/taskstats-fix-the-structure-members-alignment-issue.patch new file mode 100644 index 00000000000..9af5ec9c1d4 --- /dev/null +++ b/queue-2.6.20/taskstats-fix-the-structure-members-alignment-issue.patch @@ -0,0 +1,241 @@ +From stable-bounces@linux.kernel.org Mon Apr 23 14:45:22 2007 +From: Balbir Singh +Date: Mon, 23 Apr 2007 14:41:05 -0700 +Subject: Taskstats fix the structure members alignment issue +To: torvalds@linux-foundation.org +Cc: nagar@watson.ibm.com, balbir@in.ibm.com, jlan@engr.sgi.com, akpm@linux-foundation.org, stable@kernel.org, balbir@linux.vnet.ibm.com +Message-ID: <200704232143.l3NLhhCC025709@shell0.pdx.osdl.net> + + +From: Balbir Singh + +We broke the the alignment of members of taskstats to the 8 byte boundary +with the CSA patches. In the current kernel, the taskstats structure is +not suitable for use by 32 bit applications in a 64 bit kernel. + +On x86_64 + +Offsets of taskstats' members (64 bit kernel, 64 bit application) + +@taskstats'offsetof[@taskstats'indices] = ( + 0, # version + 4, # ac_exitcode + 8, # ac_flag + 9, # ac_nice + 16, # cpu_count + 24, # cpu_delay_total + 32, # blkio_count + 40, # blkio_delay_total + 48, # swapin_count + 56, # swapin_delay_total + 64, # cpu_run_real_total + 72, # cpu_run_virtual_total + 80, # ac_comm + 112, # ac_sched + 113, # ac_pad + 116, # ac_uid + 120, # ac_gid + 124, # ac_pid + 128, # ac_ppid + 132, # ac_btime + 136, # ac_etime + 144, # ac_utime + 152, # ac_stime + 160, # ac_minflt + 168, # ac_majflt + 176, # coremem + 184, # virtmem + 192, # hiwater_rss + 200, # hiwater_vm + 208, # read_char + 216, # write_char + 224, # read_syscalls + 232, # write_syscalls + 240, # read_bytes + 248, # write_bytes + 256, # cancelled_write_bytes + ); + +Offsets of taskstats' members (64 bit kernel, 32 bit application) + +@taskstats'offsetof[@taskstats'indices] = ( + 0, # version + 4, # ac_exitcode + 8, # ac_flag + 9, # ac_nice + 12, # cpu_count + 20, # cpu_delay_total + 28, # blkio_count + 36, # blkio_delay_total + 44, # swapin_count + 52, # swapin_delay_total + 60, # cpu_run_real_total + 68, # cpu_run_virtual_total + 76, # ac_comm + 108, # ac_sched + 109, # ac_pad + 112, # ac_uid + 116, # ac_gid + 120, # ac_pid + 124, # ac_ppid + 128, # ac_btime + 132, # ac_etime + 140, # ac_utime + 148, # ac_stime + 156, # ac_minflt + 164, # ac_majflt + 172, # coremem + 180, # virtmem + 188, # hiwater_rss + 196, # hiwater_vm + 204, # read_char + 212, # write_char + 220, # read_syscalls + 228, # write_syscalls + 236, # read_bytes + 244, # write_bytes + 252, # cancelled_write_bytes + ); + +This is one way to solve the problem without re-arranging structure members +is to pack the structure. The patch adds an __attribute__((aligned(8))) to +the taskstats structure members so that 32 bit applications using taskstats +can work with a 64 bit kernel. + +Using __attribute__((packed)) would break the 64 bit alignment of members. + +The fix was tested on x86_64. After the fix, we got + +Offsets of taskstats' members (64 bit kernel, 64 bit application) + +@taskstats'offsetof[@taskstats'indices] = ( + 0, # version + 4, # ac_exitcode + 8, # ac_flag + 9, # ac_nice + 16, # cpu_count + 24, # cpu_delay_total + 32, # blkio_count + 40, # blkio_delay_total + 48, # swapin_count + 56, # swapin_delay_total + 64, # cpu_run_real_total + 72, # cpu_run_virtual_total + 80, # ac_comm + 112, # ac_sched + 113, # ac_pad + 120, # ac_uid + 124, # ac_gid + 128, # ac_pid + 132, # ac_ppid + 136, # ac_btime + 144, # ac_etime + 152, # ac_utime + 160, # ac_stime + 168, # ac_minflt + 176, # ac_majflt + 184, # coremem + 192, # virtmem + 200, # hiwater_rss + 208, # hiwater_vm + 216, # read_char + 224, # write_char + 232, # read_syscalls + 240, # write_syscalls + 248, # read_bytes + 256, # write_bytes + 264, # cancelled_write_bytes + ); + +Offsets of taskstats' members (64 bit kernel, 32 bit application) + +@taskstats'offsetof[@taskstats'indices] = ( + 0, # version + 4, # ac_exitcode + 8, # ac_flag + 9, # ac_nice + 16, # cpu_count + 24, # cpu_delay_total + 32, # blkio_count + 40, # blkio_delay_total + 48, # swapin_count + 56, # swapin_delay_total + 64, # cpu_run_real_total + 72, # cpu_run_virtual_total + 80, # ac_comm + 112, # ac_sched + 113, # ac_pad + 120, # ac_uid + 124, # ac_gid + 128, # ac_pid + 132, # ac_ppid + 136, # ac_btime + 144, # ac_etime + 152, # ac_utime + 160, # ac_stime + 168, # ac_minflt + 176, # ac_majflt + 184, # coremem + 192, # virtmem + 200, # hiwater_rss + 208, # hiwater_vm + 216, # read_char + 224, # write_char + 232, # read_syscalls + 240, # write_syscalls + 248, # read_bytes + 256, # write_bytes + 264, # cancelled_write_bytes + ); + +Signed-off-by: Balbir Singh +Cc: Jay Lan +Cc: Shailabh Nagar +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/taskstats.h | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/include/linux/taskstats.h ++++ b/include/linux/taskstats.h +@@ -31,7 +31,7 @@ + */ + + +-#define TASKSTATS_VERSION 3 ++#define TASKSTATS_VERSION 4 + #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN + * in linux/sched.h */ + +@@ -66,7 +66,7 @@ struct taskstats { + /* Delay waiting for cpu, while runnable + * count, delay_total NOT updated atomically + */ +- __u64 cpu_count; ++ __u64 cpu_count __attribute__((aligned(8))); + __u64 cpu_delay_total; + + /* Following four fields atomically updated using task->delays->lock */ +@@ -101,14 +101,17 @@ struct taskstats { + + /* Basic Accounting Fields start */ + char ac_comm[TS_COMM_LEN]; /* Command name */ +- __u8 ac_sched; /* Scheduling discipline */ ++ __u8 ac_sched __attribute__((aligned(8))); ++ /* Scheduling discipline */ + __u8 ac_pad[3]; +- __u32 ac_uid; /* User ID */ ++ __u32 ac_uid __attribute__((aligned(8))); ++ /* User ID */ + __u32 ac_gid; /* Group ID */ + __u32 ac_pid; /* Process ID */ + __u32 ac_ppid; /* Parent process ID */ + __u32 ac_btime; /* Begin time [sec since 1970] */ +- __u64 ac_etime; /* Elapsed time [usec] */ ++ __u64 ac_etime __attribute__((aligned(8))); ++ /* Elapsed time [usec] */ + __u64 ac_utime; /* User CPU time [usec] */ + __u64 ac_stime; /* SYstem CPU time [usec] */ + __u64 ac_minflt; /* Minor Page Fault Count */ -- 2.47.3