--- /dev/null
+From security-bounces@linux.kernel.org Tue Nov 8 07:05:26 2005
+Date: Tue, 8 Nov 2005 15:03:46 +0000 (GMT)
+From: Mark J Cox <mjc@redhat.com>
+To: security@kernel.org
+Message-ID: <0511081502560.8682@dell1.moose.awe.com>
+Cc: aviro@redhat.com, vendor-sec@lst.de
+Subject: CVE-2005-2709 sysctl unregistration oops
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+You could open the /proc/sys/net/ipv4/conf/<if>/<whatever> file, then
+wait for interface to go away, try to grab as much memory as possible in
+hope to hit the (kfreed) ctl_table. Then fill it with pointers to your
+function. Then do read from file you've opened and if you are lucky,
+you'll get it called as ->proc_handler() in kernel mode.
+
+So this is at least an Oops and possibly more. It does depend on an
+interface going away though, so less of a security risk than it would
+otherwise be.
+
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+diff -urN current/arch/s390/appldata/appldata_base.c sysctl/arch/s390/appldata/appldata_base.c
+--- current/arch/s390/appldata/appldata_base.c 2005-08-28 23:09:40.000000000 -0400
++++ sysctl/arch/s390/appldata/appldata_base.c 2005-11-03 08:53:57.000000000 -0500
+@@ -592,12 +592,15 @@
+ */
+ void appldata_unregister_ops(struct appldata_ops *ops)
+ {
++ void *table;
+ spin_lock(&appldata_ops_lock);
+- unregister_sysctl_table(ops->sysctl_header);
+ list_del(&ops->list);
+- kfree(ops->ctl_table);
++ /* at that point any incoming access will fail */
++ table = ops->ctl_table;
+ ops->ctl_table = NULL;
+ spin_unlock(&appldata_ops_lock);
++ unregister_sysctl_table(ops->sysctl_header);
++ kfree(table);
+ P_INFO("%s-ops unregistered!\n", ops->name);
+ }
+ /********************** module-ops management <END> **************************/
+diff -urN current/include/linux/proc_fs.h sysctl/include/linux/proc_fs.h
+--- current/include/linux/proc_fs.h 2005-08-28 23:09:48.000000000 -0400
++++ sysctl/include/linux/proc_fs.h 2005-11-03 08:43:22.000000000 -0500
+@@ -66,6 +66,7 @@
+ write_proc_t *write_proc;
+ atomic_t count; /* use count */
+ int deleted; /* delete flag */
++ void *set;
+ };
+
+ struct kcore_list {
+diff -urN current/include/linux/sysctl.h sysctl/include/linux/sysctl.h
+--- current/include/linux/sysctl.h 2005-10-28 16:42:49.000000000 -0400
++++ sysctl/include/linux/sysctl.h 2005-11-03 08:43:22.000000000 -0500
+@@ -24,6 +24,7 @@
+ #include <linux/compiler.h>
+
+ struct file;
++struct completion;
+
+ #define CTL_MAXNAME 10 /* how many path components do we allow in a
+ call to sysctl? In other words, what is
+@@ -925,6 +926,8 @@
+ {
+ ctl_table *ctl_table;
+ struct list_head ctl_entry;
++ int used;
++ struct completion *unregistering;
+ };
+
+ struct ctl_table_header * register_sysctl_table(ctl_table * table,
+diff -urN current/kernel/sysctl.c sysctl/kernel/sysctl.c
+--- current/kernel/sysctl.c 2005-10-28 16:42:49.000000000 -0400
++++ sysctl/kernel/sysctl.c 2005-11-03 08:50:17.000000000 -0500
+@@ -169,7 +169,7 @@
+
+ extern struct proc_dir_entry *proc_sys_root;
+
+-static void register_proc_table(ctl_table *, struct proc_dir_entry *);
++static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *);
+ static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
+ #endif
+
+@@ -992,10 +992,51 @@
+
+ extern void init_irq_proc (void);
+
++static DEFINE_SPINLOCK(sysctl_lock);
++
++/* called under sysctl_lock */
++static int use_table(struct ctl_table_header *p)
++{
++ if (unlikely(p->unregistering))
++ return 0;
++ p->used++;
++ return 1;
++}
++
++/* called under sysctl_lock */
++static void unuse_table(struct ctl_table_header *p)
++{
++ if (!--p->used)
++ if (unlikely(p->unregistering))
++ complete(p->unregistering);
++}
++
++/* called under sysctl_lock, will reacquire if has to wait */
++static void start_unregistering(struct ctl_table_header *p)
++{
++ /*
++ * if p->used is 0, nobody will ever touch that entry again;
++ * we'll eliminate all paths to it before dropping sysctl_lock
++ */
++ if (unlikely(p->used)) {
++ struct completion wait;
++ init_completion(&wait);
++ p->unregistering = &wait;
++ spin_unlock(&sysctl_lock);
++ wait_for_completion(&wait);
++ spin_lock(&sysctl_lock);
++ }
++ /*
++ * do not remove from the list until nobody holds it; walking the
++ * list in do_sysctl() relies on that.
++ */
++ list_del_init(&p->ctl_entry);
++}
++
+ void __init sysctl_init(void)
+ {
+ #ifdef CONFIG_PROC_FS
+- register_proc_table(root_table, proc_sys_root);
++ register_proc_table(root_table, proc_sys_root, &root_table_header);
+ init_irq_proc();
+ #endif
+ }
+@@ -1004,6 +1045,7 @@
+ void __user *newval, size_t newlen)
+ {
+ struct list_head *tmp;
++ int error = -ENOTDIR;
+
+ if (nlen <= 0 || nlen >= CTL_MAXNAME)
+ return -ENOTDIR;
+@@ -1012,20 +1054,30 @@
+ if (!oldlenp || get_user(old_len, oldlenp))
+ return -EFAULT;
+ }
++ spin_lock(&sysctl_lock);
+ tmp = &root_table_header.ctl_entry;
+ do {
+ struct ctl_table_header *head =
+ list_entry(tmp, struct ctl_table_header, ctl_entry);
+ void *context = NULL;
+- int error = parse_table(name, nlen, oldval, oldlenp,
++
++ if (!use_table(head))
++ continue;
++
++ spin_unlock(&sysctl_lock);
++
++ error = parse_table(name, nlen, oldval, oldlenp,
+ newval, newlen, head->ctl_table,
+ &context);
+ kfree(context);
++
++ spin_lock(&sysctl_lock);
++ unuse_table(head);
+ if (error != -ENOTDIR)
+- return error;
+- tmp = tmp->next;
+- } while (tmp != &root_table_header.ctl_entry);
+- return -ENOTDIR;
++ break;
++ } while ((tmp = tmp->next) != &root_table_header.ctl_entry);
++ spin_unlock(&sysctl_lock);
++ return error;
+ }
+
+ asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
+@@ -1236,12 +1288,16 @@
+ return NULL;
+ tmp->ctl_table = table;
+ INIT_LIST_HEAD(&tmp->ctl_entry);
++ tmp->used = 0;
++ tmp->unregistering = NULL;
++ spin_lock(&sysctl_lock);
+ if (insert_at_head)
+ list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
+ else
+ list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
++ spin_unlock(&sysctl_lock);
+ #ifdef CONFIG_PROC_FS
+- register_proc_table(table, proc_sys_root);
++ register_proc_table(table, proc_sys_root, tmp);
+ #endif
+ return tmp;
+ }
+@@ -1255,10 +1311,13 @@
+ */
+ void unregister_sysctl_table(struct ctl_table_header * header)
+ {
+- list_del(&header->ctl_entry);
++ might_sleep();
++ spin_lock(&sysctl_lock);
++ start_unregistering(header);
+ #ifdef CONFIG_PROC_FS
+ unregister_proc_table(header->ctl_table, proc_sys_root);
+ #endif
++ spin_unlock(&sysctl_lock);
+ kfree(header);
+ }
+
+@@ -1269,7 +1328,7 @@
+ #ifdef CONFIG_PROC_FS
+
+ /* Scan the sysctl entries in table and add them all into /proc */
+-static void register_proc_table(ctl_table * table, struct proc_dir_entry *root)
++static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set)
+ {
+ struct proc_dir_entry *de;
+ int len;
+@@ -1305,13 +1364,14 @@
+ de = create_proc_entry(table->procname, mode, root);
+ if (!de)
+ continue;
++ de->set = set;
+ de->data = (void *) table;
+ if (table->proc_handler)
+ de->proc_fops = &proc_sys_file_operations;
+ }
+ table->de = de;
+ if (de->mode & S_IFDIR)
+- register_proc_table(table->child, de);
++ register_proc_table(table->child, de, set);
+ }
+ }
+
+@@ -1336,6 +1396,13 @@
+ continue;
+ }
+
++ /*
++ * In any case, mark the entry as goner; we'll keep it
++ * around if it's busy, but we'll know to do nothing with
++ * its fields. We are under sysctl_lock here.
++ */
++ de->data = NULL;
++
+ /* Don't unregister proc entries that are still being used.. */
+ if (atomic_read(&de->count))
+ continue;
+@@ -1349,27 +1416,38 @@
+ size_t count, loff_t *ppos)
+ {
+ int op;
+- struct proc_dir_entry *de;
++ struct proc_dir_entry *de = PDE(file->f_dentry->d_inode);
+ struct ctl_table *table;
+ size_t res;
+- ssize_t error;
+-
+- de = PDE(file->f_dentry->d_inode);
+- if (!de || !de->data)
+- return -ENOTDIR;
+- table = (struct ctl_table *) de->data;
+- if (!table || !table->proc_handler)
+- return -ENOTDIR;
+- op = (write ? 002 : 004);
+- if (ctl_perm(table, op))
+- return -EPERM;
++ ssize_t error = -ENOTDIR;
+
+- res = count;
+-
+- error = (*table->proc_handler) (table, write, file, buf, &res, ppos);
+- if (error)
+- return error;
+- return res;
++ spin_lock(&sysctl_lock);
++ if (de && de->data && use_table(de->set)) {
++ /*
++ * at that point we know that sysctl was not unregistered
++ * and won't be until we finish
++ */
++ spin_unlock(&sysctl_lock);
++ table = (struct ctl_table *) de->data;
++ if (!table || !table->proc_handler)
++ goto out;
++ error = -EPERM;
++ op = (write ? 002 : 004);
++ if (ctl_perm(table, op))
++ goto out;
++
++ /* careful: calling conventions are nasty here */
++ res = count;
++ error = (*table->proc_handler)(table, write, file,
++ buf, &res, ppos);
++ if (!error)
++ error = res;
++ out:
++ spin_lock(&sysctl_lock);
++ unuse_table(de->set);
++ }
++ spin_unlock(&sysctl_lock);
++ return error;
+ }
+
+ static int proc_opensys(struct inode *inode, struct file *file)
+_______________________________________________
+Security mailing list
+Security@linux.kernel.org
+http://linux.kernel.org/mailman/listinfo/security
+