From 8f69975df14878199a86f784fc24a0cca01fcc15 Mon Sep 17 00:00:00 2001 From: Ben Schweikert Date: Wed, 5 Aug 2009 10:39:24 +0200 Subject: [PATCH] Added missing SuSE-Xen-Patches. --- .../patches.apparmor/add-path_permission.diff | 201 + .../patches.apparmor/apparmor-2.6.25.diff | 47 + .../patches.apparmor/apparmor-audit.diff | 72 + .../patches.apparmor/apparmor-intree.diff | 31 + .../patches.apparmor/apparmor-lsm.diff | 910 + .../patches.apparmor/apparmor-main.diff | 1493 + .../patches.apparmor/apparmor-misc.diff | 1441 + .../apparmor-module_interface.diff | 1350 + .../patches.apparmor/apparmor-network.diff | 408 + .../apparmor-ptrace-2.6.27.diff | 55 + .../patches.apparmor/apparmor-rlimits.diff | 461 + .../patches.apparmor/d_namespace_path.diff | 60 + .../d_namespace_path_oops_fix.diff | 25 + .../do_path_lookup-nameidata.diff | 42 + .../patches.apparmor/file-handle-ops.diff | 84 + .../patches.apparmor/fix-complain.diff | 26 + .../patches.apparmor/fix-deleted-leak.diff | 25 + .../patches.apparmor/fix-security-param.diff | 66 + .../patches.apparmor/fix-vfs_rmdir.diff | 44 + .../patches.apparmor/fork-tracking.diff | 108 + .../fsetattr-reintro-ATTR_FILE.diff | 28 + .../fsetattr-restore-ia_file.diff | 58 + .../patches.apparmor/fsetattr.diff | 414 + .../patches.apparmor/remove_suid.diff | 41 + .../patches.apparmor/security-create.diff | 107 + .../patches.apparmor/security-getxattr.diff | 128 + .../patches.apparmor/security-link.diff | 149 + .../patches.apparmor/security-listxattr.diff | 105 + .../patches.apparmor/security-mkdir.diff | 106 + .../patches.apparmor/security-mknod.diff | 124 + .../patches.apparmor/security-readlink.diff | 104 + .../security-removexattr.diff | 143 + .../patches.apparmor/security-rename.diff | 160 + .../patches.apparmor/security-rmdir.diff | 127 + .../patches.apparmor/security-setattr.diff | 146 + .../patches.apparmor/security-setxattr.diff | 256 + .../patches.apparmor/security-symlink.diff | 105 + .../patches.apparmor/security-unlink.diff | 132 + .../patches.apparmor/security-xattr-file.diff | 592 + .../patches.apparmor/sysctl-pathname.diff | 111 + .../unambiguous-__d_path.diff | 267 + .../patches.apparmor/vfs-getxattr.diff | 190 + .../patches.apparmor/vfs-link.diff | 91 + .../patches.apparmor/vfs-listxattr.diff | 101 + .../patches.apparmor/vfs-mkdir.diff | 137 + .../patches.apparmor/vfs-mknod.diff | 99 + .../patches.apparmor/vfs-notify_change.diff | 291 + .../patches.apparmor/vfs-removexattr.diff | 121 + .../patches.apparmor/vfs-rename.diff | 125 + .../patches.apparmor/vfs-rmdir.diff | 123 + .../patches.apparmor/vfs-setxattr.diff | 159 + .../patches.apparmor/vfs-symlink.diff | 123 + .../patches.apparmor/vfs-unlink.diff | 99 + .../ia64-node_mem_map-node_start_pfn.diff | 48 + .../ppc-axon-missing-msi-workaround-5.diff | 143 + .../ppc-dynamic-reconfiguration.diff | 215 + .../patches.arch/ppc-vmcoreinfo.diff | 41 + .../s390-04-06-cio-sac-update.diff | 28 + .../s390-07-01-zfcp-port-failed-message.diff | 73 + .../s390-07-02-zfcp-unchained-fsf.diff | 378 + .../patches.arch/s390-07-03-topology-fix.diff | 207 + .../s390-08-01-cio-fix-mp-mode.diff | 61 + .../s390-08-02-zfcp-gpn-align-fix.diff | 109 + .../s390-08-03-iucv-cpu-hotremove.diff | 83 + .../s390-08-04-compat-sigaltstack.diff | 31 + .../patches.arch/s390-09-04-topology.diff | 30 + .../fcoe-change-fcoe_sw-sg_tablesi.diff | 23 + .../fcoe-check-return-for-fc_set_m.diff | 24 + .../fcoe-fix-frame-length-validati.diff | 149 + ...oe-fix-incorrect-use-of-struct-module.diff | 51 + .../fcoe-improved-load-balancing-i.diff | 43 + .../fcoe-logoff-of-the-fabric-when.diff | 26 + .../fcoe-remove-warn_on-in-fc_set.diff | 46 + .../fcoe-user_mfs-is-never-used.diff | 34 + .../libfc-add-fc_disc-c-locking-co.diff | 29 + ...-for-err-when-recv-state-is-incorrect.diff | 218 + .../libfc-ensure-correct-device_pu.diff | 214 + .../libfc-handle-rrq-exch-timeout.diff | 27 + .../libfc-improve-fc_lport-c-locki.diff | 114 + .../libfc-improve-fc_rport-c-locki.diff | 50 + .../libfc-make-fc_disc-inline-with.diff | 217 + .../libfc-make-rscn-parsing-more-r.diff | 59 + .../libfc-make-sure-we-access-the.diff | 114 + .../libfc-pass-lport-in-exch_mgr_r.diff | 90 + .../libfc-set-the-release-function.diff | 91 + .../libfc-updated-comment-for-orde.diff | 56 + .../libfc-updated-libfc-fcoe-modul.diff | 35 + .../libfc-use-an-operations-struct.diff | 275 + .../libfc-when-rport-goes-away-re.diff | 41 + .../patches.drivers/libfc_locking.diff | 377 + .../patches.drivers/libfc_rport.diff | 265 + .../b43legacy-fix-led_device_naming.diff | 54 + .../ext2_mtime_update_on_rename.diff | 31 + .../patches.fixes/ext3_false_EIO_fix.diff | 172 + .../patches.fixes/ia64-sparse-fixes.diff | 54 + .../patches.fixes/ia64_uv_partition_id.diff | 35 + .../patches.fixes/ia64_uv_watchlist.diff | 78 + ...wlwifi-fix-iwl-3945_led_device_naming.diff | 57 + .../iwlwifi-fix-iwl-led_device_naming.diff | 54 + .../patches.fixes/kdb-kdump.diff | 60 + .../patches.fixes/kdb-oops-panic.diff | 65 + .../patches.fixes/kdb-read-CR.diff | 29 + .../patches.fixes/kdump-x86-sparsemem.diff | 41 + .../make-note_interrupt-fast.diff | 160 + .../patches.fixes/nfs-acl-caching.diff | 46 + .../patches.fixes/proc-scsi-scsi-fix.diff | 110 + .../rt2x00-fix-led_device_naming.diff | 44 + .../sd_liberal_28_sense_invalid.diff | 27 + .../uv-bios_call_memprotect.diff | 69 + .../patches.fixes/uv-bios_call_partition.diff | 155 + .../uv-bios_call_reserve_page.diff | 66 + .../patches.fixes/uv-bios_call_watchlist.diff | 100 + .../patches.fixes/uv-bios_common.diff | 280 + .../patches.fixes/uv-efi_bios.diff | 53 + .../patches.fixes/uv-sn_region_size.diff | 70 + .../patches.fixes/uv-sysfs.diff | 109 + .../uv-xp-change_memprotect.diff | 228 + .../patches.fixes/uv-xpc-get_sn_info.diff | 28 + .../uv-xpc_create_gru_mq_uv.diff | 388 + .../uv-xpc_get_part_rsvd_page.diff | 63 + .../patches.fixes/uv_setup_irq.diff | 239 + .../ipmi-section-conflict.diff | 82 + .../psmouse-section-conflict.diff | 47 + .../patches.rpmify/cloneconfig.diff | 37 + .../patches.suse/apm_setup_UP.diff | 57 + .../patches.suse/crasher-26.diff | 260 + ...ile-capabilities-add-file_caps-switch.diff | 106 + .../file-capabilities-disable-by-default.diff | 35 + .../patches.suse/fs-knows-MAY_APPEND.diff | 59 + .../patches.suse/fs-may_iops.diff | 144 + .../genksyms-add-override-flag.diff | 126 + .../patches.suse/genksyms-override.diff | 110 + .../patches.suse/genksyms-reference.diff | 484 + .../patches.suse/kdb-resolve-uv-conflict.diff | 224 + .../patches.suse/nfs4acl-ai.diff | 123 + .../patches.suse/nfs4acl-common.diff | 1770 + .../patches.suse/nfs4acl-ext3.diff | 906 + .../nfsacl-client-cache-CHECK.diff | 76 + .../novfs-map-drives-correctly.diff | 78 + .../patches.suse/novfs-merge-changes.diff | 333 + .../patches.suse/panic-on-io-nmi.diff | 116 + .../patches.suse/parser-match_string.diff | 55 + .../raw_device_max_minors_param.diff | 112 + .../reiserfs-add-reiserfs_error.diff | 64 + .../reiserfs-buffer-info-for-balance.diff | 122 + .../reiserfs-cleanup-path-funcs.diff | 290 + .../reiserfs-consistent-messages.diff | 80 + ...iserfs-eliminate-per-super-xattr-lock.diff | 573 + .../reiserfs-journaled-xattrs.diff | 422 + .../reiserfs-kill-xattr-readdir.diff | 582 + ...inode-xattr-locking-more-fine-grained.diff | 421 + .../reiserfs-rearrange-journal-abort.diff | 80 + .../reiserfs-reiserfs-warning.diff | 2341 + .../patches.suse/reiserfs-reiserfs_info.diff | 87 + .../patches.suse/reiserfs-reiserfs_panic.diff | 970 + .../reiserfs-remove-i_has_xattr_dir.diff | 51 + .../reiserfs-remove-link-detection.diff | 42 + .../patches.suse/reiserfs-rename-._.diff | 1991 + .../patches.suse/reiserfs-rename-p_._.diff | 1816 + .../patches.suse/reiserfs-rename-p_s_bh.diff | 489 + .../reiserfs-rename-p_s_inode.diff | 554 + .../patches.suse/reiserfs-rename-p_s_sb.diff | 2869 + .../patches.suse/reiserfs-rename-p_s_tb.diff | 1040 + .../patches.suse/reiserfs-selinux.diff | 316 + .../reiserfs-simplify-buffer-info.diff | 363 + ...ify-xattr-internal-file-lookups-opens.diff | 474 + .../reiserfs-strip-whitespace.diff | 1335 + .../reiserfs-use-generic-xattr-handlers.diff | 1142 + .../reiserfs-use-reiserfs_error.diff | 512 + .../patches.suse/s390-Kerntypes.diff | 320 + .../patches.suse/s390-System.map.diff | 30 + .../usb_correct_config_ti_04b3_4543.diff | 26 + .../patches.trace/ftrace-framepointer.diff | 32 + .../patches.trace/s390-syscall-get-nr.diff | 281 + .../patches.xen/xen3-auto-arch-i386.diff | 280 + .../patches.xen/xen3-auto-arch-x86.diff | 328 + .../patches.xen/xen3-auto-arch-x86_64.diff | 248 + .../patches.xen/xen3-auto-common.diff | 4189 ++ .../xen3-auto-include-xen-interface.diff | 5161 ++ .../patches.xen/xen3-auto-xen-arch.diff | 46515 ++++++++++++++ .../patches.xen/xen3-auto-xen-drivers.diff | 52850 ++++++++++++++++ .../patches.xen/xen3-auto-xen-kconfig.diff | 808 + .../patches.xen/xen3-panic-on-io-nmi.diff | 66 + .../patches.xen/xen3-uv_setup_irq.diff | 28 + 184 files changed, 154619 insertions(+) create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/add-path_permission.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/apparmor-2.6.25.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/apparmor-audit.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/apparmor-intree.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/apparmor-lsm.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/apparmor-main.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/apparmor-misc.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/apparmor-module_interface.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/apparmor-network.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/apparmor-ptrace-2.6.27.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/apparmor-rlimits.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/d_namespace_path.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/d_namespace_path_oops_fix.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/do_path_lookup-nameidata.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/file-handle-ops.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/fix-complain.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/fix-deleted-leak.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/fix-security-param.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/fix-vfs_rmdir.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/fork-tracking.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/fsetattr-reintro-ATTR_FILE.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/fsetattr-restore-ia_file.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/fsetattr.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/remove_suid.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/security-create.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/security-getxattr.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/security-link.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/security-listxattr.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/security-mkdir.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/security-mknod.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/security-readlink.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/security-removexattr.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/security-rename.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/security-rmdir.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/security-setattr.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/security-setxattr.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/security-symlink.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/security-unlink.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/security-xattr-file.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/sysctl-pathname.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/unambiguous-__d_path.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/vfs-getxattr.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/vfs-link.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/vfs-listxattr.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/vfs-mkdir.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/vfs-mknod.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/vfs-notify_change.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/vfs-removexattr.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/vfs-rename.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/vfs-rmdir.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/vfs-setxattr.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/vfs-symlink.diff create mode 100644 src/patches/suse-2.6.27.25/patches.apparmor/vfs-unlink.diff create mode 100644 src/patches/suse-2.6.27.25/patches.arch/ia64-node_mem_map-node_start_pfn.diff create mode 100644 src/patches/suse-2.6.27.25/patches.arch/ppc-axon-missing-msi-workaround-5.diff create mode 100644 src/patches/suse-2.6.27.25/patches.arch/ppc-dynamic-reconfiguration.diff create mode 100644 src/patches/suse-2.6.27.25/patches.arch/ppc-vmcoreinfo.diff create mode 100644 src/patches/suse-2.6.27.25/patches.arch/s390-04-06-cio-sac-update.diff create mode 100644 src/patches/suse-2.6.27.25/patches.arch/s390-07-01-zfcp-port-failed-message.diff create mode 100644 src/patches/suse-2.6.27.25/patches.arch/s390-07-02-zfcp-unchained-fsf.diff create mode 100644 src/patches/suse-2.6.27.25/patches.arch/s390-07-03-topology-fix.diff create mode 100644 src/patches/suse-2.6.27.25/patches.arch/s390-08-01-cio-fix-mp-mode.diff create mode 100644 src/patches/suse-2.6.27.25/patches.arch/s390-08-02-zfcp-gpn-align-fix.diff create mode 100644 src/patches/suse-2.6.27.25/patches.arch/s390-08-03-iucv-cpu-hotremove.diff create mode 100644 src/patches/suse-2.6.27.25/patches.arch/s390-08-04-compat-sigaltstack.diff create mode 100644 src/patches/suse-2.6.27.25/patches.arch/s390-09-04-topology.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/fcoe-change-fcoe_sw-sg_tablesi.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/fcoe-check-return-for-fc_set_m.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/fcoe-fix-frame-length-validati.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/fcoe-fix-incorrect-use-of-struct-module.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/fcoe-improved-load-balancing-i.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/fcoe-logoff-of-the-fabric-when.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/fcoe-remove-warn_on-in-fc_set.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/fcoe-user_mfs-is-never-used.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/libfc-add-fc_disc-c-locking-co.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/libfc-check-for-err-when-recv-state-is-incorrect.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/libfc-ensure-correct-device_pu.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/libfc-handle-rrq-exch-timeout.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/libfc-improve-fc_lport-c-locki.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/libfc-improve-fc_rport-c-locki.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/libfc-make-fc_disc-inline-with.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/libfc-make-rscn-parsing-more-r.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/libfc-make-sure-we-access-the.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/libfc-pass-lport-in-exch_mgr_r.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/libfc-set-the-release-function.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/libfc-updated-comment-for-orde.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/libfc-updated-libfc-fcoe-modul.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/libfc-use-an-operations-struct.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/libfc-when-rport-goes-away-re.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/libfc_locking.diff create mode 100644 src/patches/suse-2.6.27.25/patches.drivers/libfc_rport.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/b43legacy-fix-led_device_naming.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/ext2_mtime_update_on_rename.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/ext3_false_EIO_fix.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/ia64-sparse-fixes.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/ia64_uv_partition_id.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/ia64_uv_watchlist.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/iwlwifi-fix-iwl-3945_led_device_naming.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/iwlwifi-fix-iwl-led_device_naming.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/kdb-kdump.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/kdb-oops-panic.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/kdb-read-CR.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/kdump-x86-sparsemem.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/make-note_interrupt-fast.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/nfs-acl-caching.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/proc-scsi-scsi-fix.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/rt2x00-fix-led_device_naming.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/sd_liberal_28_sense_invalid.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/uv-bios_call_memprotect.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/uv-bios_call_partition.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/uv-bios_call_reserve_page.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/uv-bios_call_watchlist.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/uv-bios_common.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/uv-efi_bios.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/uv-sn_region_size.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/uv-sysfs.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/uv-xp-change_memprotect.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/uv-xpc-get_sn_info.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/uv-xpc_create_gru_mq_uv.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/uv-xpc_get_part_rsvd_page.diff create mode 100644 src/patches/suse-2.6.27.25/patches.fixes/uv_setup_irq.diff create mode 100644 src/patches/suse-2.6.27.25/patches.kernel.org/ipmi-section-conflict.diff create mode 100644 src/patches/suse-2.6.27.25/patches.kernel.org/psmouse-section-conflict.diff create mode 100644 src/patches/suse-2.6.27.25/patches.rpmify/cloneconfig.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/apm_setup_UP.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/crasher-26.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/file-capabilities-add-file_caps-switch.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/file-capabilities-disable-by-default.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/fs-knows-MAY_APPEND.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/fs-may_iops.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/genksyms-add-override-flag.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/genksyms-override.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/genksyms-reference.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/kdb-resolve-uv-conflict.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/nfs4acl-ai.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/nfs4acl-common.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/nfs4acl-ext3.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/nfsacl-client-cache-CHECK.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/novfs-map-drives-correctly.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/novfs-merge-changes.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/panic-on-io-nmi.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/parser-match_string.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/raw_device_max_minors_param.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-add-reiserfs_error.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-buffer-info-for-balance.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-cleanup-path-funcs.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-consistent-messages.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-eliminate-per-super-xattr-lock.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-journaled-xattrs.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-kill-xattr-readdir.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-make-per-inode-xattr-locking-more-fine-grained.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-rearrange-journal-abort.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-reiserfs-warning.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-reiserfs_info.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-reiserfs_panic.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-remove-i_has_xattr_dir.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-remove-link-detection.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-._.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_._.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_s_bh.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_s_inode.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_s_sb.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_s_tb.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-selinux.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-simplify-buffer-info.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-simplify-xattr-internal-file-lookups-opens.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-strip-whitespace.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-use-generic-xattr-handlers.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/reiserfs-use-reiserfs_error.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/s390-Kerntypes.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/s390-System.map.diff create mode 100644 src/patches/suse-2.6.27.25/patches.suse/usb_correct_config_ti_04b3_4543.diff create mode 100644 src/patches/suse-2.6.27.25/patches.trace/ftrace-framepointer.diff create mode 100644 src/patches/suse-2.6.27.25/patches.trace/s390-syscall-get-nr.diff create mode 100644 src/patches/suse-2.6.27.25/patches.xen/xen3-auto-arch-i386.diff create mode 100644 src/patches/suse-2.6.27.25/patches.xen/xen3-auto-arch-x86.diff create mode 100644 src/patches/suse-2.6.27.25/patches.xen/xen3-auto-arch-x86_64.diff create mode 100644 src/patches/suse-2.6.27.25/patches.xen/xen3-auto-common.diff create mode 100644 src/patches/suse-2.6.27.25/patches.xen/xen3-auto-include-xen-interface.diff create mode 100644 src/patches/suse-2.6.27.25/patches.xen/xen3-auto-xen-arch.diff create mode 100644 src/patches/suse-2.6.27.25/patches.xen/xen3-auto-xen-drivers.diff create mode 100644 src/patches/suse-2.6.27.25/patches.xen/xen3-auto-xen-kconfig.diff create mode 100644 src/patches/suse-2.6.27.25/patches.xen/xen3-panic-on-io-nmi.diff create mode 100644 src/patches/suse-2.6.27.25/patches.xen/xen3-uv_setup_irq.diff diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/add-path_permission.diff b/src/patches/suse-2.6.27.25/patches.apparmor/add-path_permission.diff new file mode 100644 index 0000000000..0eb796d795 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/add-path_permission.diff @@ -0,0 +1,201 @@ +From: Jeff Mahoney +Subject: [PATCH] vfs: introduce path_permission() + + 2.6.27 eliminated the nameidata parameter from permission and replaced + several call sites with inode_permission. This keeps the information + required by AppArmor from reaching it. + + The following patch factors out the permission assessment part of + inode_permission into __inode_permission and adds a path_permission + function that takes a struct path instead of a struct inode and passes + it to security_path_permission instead of security_inode_permission. + + All of the call sites that had access to a struct path whether by + itself or via a file or nameidata (and used it) in 2.6.26 are changed + to use the path_permission call. + +Signed-off-by: Jeff Mahoney +--- + fs/inotify_user.c | 2 +- + fs/namei.c | 32 ++++++++++++++++++++++++-------- + fs/open.c | 10 +++++----- + include/linux/fs.h | 5 +++++ + 4 files changed, 35 insertions(+), 14 deletions(-) + +--- a/fs/inotify_user.c ++++ b/fs/inotify_user.c +@@ -372,7 +372,7 @@ static int find_inode(const char __user + if (error) + return error; + /* you can only watch an inode if you have read permissions on it */ +- error = inode_permission(path->dentry->d_inode, MAY_READ); ++ error = path_permission(path, MAY_READ); + if (error) + path_put(path); + return error; +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -227,7 +227,7 @@ int generic_permission(struct inode *ino + return -EACCES; + } + +-int inode_permission(struct inode *inode, int mask) ++static int __inode_permission(struct inode *inode, int mask) + { + int retval; + int submask = mask; +@@ -273,7 +273,12 @@ int inode_permission(struct inode *inode + if (retval) + return retval; + +- retval = devcgroup_inode_permission(inode, mask); ++ return devcgroup_inode_permission(inode, mask); ++} ++ ++int inode_permission(struct inode *inode, int mask) ++{ ++ int retval = __inode_permission(inode, mask); + if (retval) + return retval; + +@@ -281,6 +286,15 @@ int inode_permission(struct inode *inode + mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND)); + } + ++int path_permission(struct path *path, int mask) ++{ ++ int retval = __inode_permission(path->dentry->d_inode, mask); ++ if (retval) ++ return retval; ++ return security_path_permission(path, ++ mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND)); ++} ++ + /** + * vfs_permission - check for access rights to a given path + * @nd: lookup result that describes the path +@@ -293,7 +307,7 @@ int inode_permission(struct inode *inode + */ + int vfs_permission(struct nameidata *nd, int mask) + { +- return inode_permission(nd->path.dentry->d_inode, mask); ++ return path_permission(&nd->path, mask); + } + + /** +@@ -310,7 +324,7 @@ int vfs_permission(struct nameidata *nd, + */ + int file_permission(struct file *file, int mask) + { +- return inode_permission(file->f_path.dentry->d_inode, mask); ++ return path_permission(&file->f_path, mask); + } + + /* +@@ -452,8 +466,9 @@ static struct dentry * cached_lookup(str + * short-cut DAC fails, then call permission() to do more + * complete permission check. + */ +-static int exec_permission_lite(struct inode *inode) ++static int exec_permission_lite(struct path *path) + { ++ struct inode *inode = path->dentry->d_inode; + umode_t mode = inode->i_mode; + + if (inode->i_op && inode->i_op->permission) +@@ -478,7 +493,7 @@ static int exec_permission_lite(struct i + + return -EACCES; + ok: +- return security_inode_permission(inode, MAY_EXEC); ++ return security_path_permission(path, MAY_EXEC); + } + + /* +@@ -875,7 +890,7 @@ static int __link_path_walk(const char * + unsigned int c; + + nd->flags |= LOOKUP_CONTINUE; +- err = exec_permission_lite(inode); ++ err = exec_permission_lite(&nd->path); + if (err == -EAGAIN) + err = vfs_permission(nd, MAY_EXEC); + if (err) +@@ -1250,7 +1265,7 @@ static struct dentry *lookup_hash(struct + { + int err; + +- err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC); ++ err = path_permission(&nd->path, MAY_EXEC); + if (err) + return ERR_PTR(err); + return __lookup_hash(&nd->last, nd->path.dentry, nd); +@@ -2907,6 +2922,7 @@ EXPORT_SYMBOL(page_symlink_inode_operati + EXPORT_SYMBOL(path_lookup); + EXPORT_SYMBOL(vfs_path_lookup); + EXPORT_SYMBOL(inode_permission); ++EXPORT_SYMBOL(path_permission); + EXPORT_SYMBOL(vfs_permission); + EXPORT_SYMBOL(file_permission); + EXPORT_SYMBOL(unlock_rename); +--- a/fs/open.c ++++ b/fs/open.c +@@ -248,7 +248,7 @@ static long do_sys_truncate(const char _ + if (error) + goto dput_and_out; + +- error = inode_permission(inode, MAY_WRITE); ++ error = path_permission(&path, MAY_WRITE); + if (error) + goto mnt_drop_write_and_out; + +@@ -493,7 +493,7 @@ SYSCALL_DEFINE3(faccessat, int, dfd, con + goto out_path_release; + } + +- res = inode_permission(inode, mode | MAY_ACCESS); ++ res = path_permission(&path, mode | MAY_ACCESS); + /* SuS v2 requires we report a read only fs too */ + if (res || !(mode & S_IWOTH) || special_file(inode->i_mode)) + goto out_path_release; +@@ -536,7 +536,7 @@ SYSCALL_DEFINE1(chdir, const char __user + if (error) + goto out; + +- error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS); ++ error = path_permission(&path, MAY_EXEC | MAY_ACCESS); + if (error) + goto dput_and_out; + +@@ -565,7 +565,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd + if (!S_ISDIR(inode->i_mode)) + goto out_putf; + +- error = inode_permission(inode, MAY_EXEC | MAY_ACCESS); ++ error = path_permission(&file->f_path, MAY_EXEC | MAY_ACCESS); + if (!error) + set_fs_pwd(current->fs, &file->f_path); + out_putf: +@@ -583,7 +583,7 @@ SYSCALL_DEFINE1(chroot, const char __use + if (error) + goto out; + +- error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS); ++ error = path_permission(&path, MAY_EXEC | MAY_ACCESS); + if (error) + goto dput_and_out; + +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1201,6 +1201,11 @@ extern void dentry_unhash(struct dentry + extern int file_permission(struct file *, int); + + /* ++ * VFS path helper functions. ++ */ ++extern int path_permission(struct path *, int); ++ ++/* + * File types + * + * NOTE! These match bits 12..15 of stat.st_mode diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-2.6.25.diff b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-2.6.25.diff new file mode 100644 index 0000000000..56bf22a154 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-2.6.25.diff @@ -0,0 +1,47 @@ +From: John Johansen +Subject: AppArmor: Patch AppArmor for 2.6.25 kernel + +Add 64 bit capabilities support to AppArmor. + +Signed-off-by: John Johansen + +--- + security/apparmor/module_interface.c | 22 ++++++++++++++++++---- + 1 file changed, 18 insertions(+), 4 deletions(-) + +--- a/security/apparmor/module_interface.c ++++ b/security/apparmor/module_interface.c +@@ -395,15 +395,29 @@ static struct aa_profile *aa_unpack_prof + if (!aa_is_nameX(e, AA_STRUCTEND, NULL)) + goto fail; + +- if (!aa_is_u32(e, &(profile->capabilities), NULL)) ++ if (!aa_is_u32(e, &(profile->capabilities.cap[0]), NULL)) + goto fail; +- if (!aa_is_u32(e, &(profile->audit_caps), NULL)) ++ if (!aa_is_u32(e, &(profile->audit_caps.cap[0]), NULL)) + goto fail; +- if (!aa_is_u32(e, &(profile->quiet_caps), NULL)) ++ if (!aa_is_u32(e, &(profile->quiet_caps.cap[0]), NULL)) + goto fail; +- if (!aa_is_u32(e, &(profile->set_caps), NULL)) ++ if (!aa_is_u32(e, &(profile->set_caps.cap[0]), NULL)) + goto fail; + ++ if (aa_is_nameX(e, AA_STRUCT, "caps64")) { ++ /* optional upper half of 64 bit caps */ ++ if (!aa_is_u32(e, &(profile->capabilities.cap[1]), NULL)) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->audit_caps.cap[1]), NULL)) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->quiet_caps.cap[1]), NULL)) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->set_caps.cap[1]), NULL)) ++ goto fail; ++ if (!aa_is_nameX(e, AA_STRUCTEND, NULL)) ++ goto fail; ++ } ++ + if (!aa_unpack_rlimits(e, profile)) + goto fail; + diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-audit.diff b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-audit.diff new file mode 100644 index 0000000000..37ee484063 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-audit.diff @@ -0,0 +1,72 @@ +From: Tony Jones +Subject: Export audit subsystem for use by modules + +Update kenel audit range comments to show AppArmor's registered range of +1500-1599. This range used to be reserved for LSPP but LSPP uses the +SE Linux range and the range was given to AppArmor. +Adds necessary export symbols for audit subsystem routines. +Changes audit_log_vformat to be externally visible (analagous to vprintf) +Patch is not in mainline -- pending AppArmor code submission to lkml + +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + include/linux/audit.h | 12 +++++++++++- + kernel/audit.c | 6 ++++-- + 2 files changed, 15 insertions(+), 3 deletions(-) + +--- a/include/linux/audit.h ++++ b/include/linux/audit.h +@@ -33,7 +33,7 @@ + * 1200 - 1299 messages internal to the audit daemon + * 1300 - 1399 audit event messages + * 1400 - 1499 SE Linux use +- * 1500 - 1599 kernel LSPP events ++ * 1500 - 1599 AppArmor use + * 1600 - 1699 kernel crypto events + * 1700 - 1799 kernel anomaly records + * 1800 - 1999 future kernel use (maybe integrity labels and related events) +@@ -119,6 +119,13 @@ + #define AUDIT_MAC_UNLBL_STCADD 1416 /* NetLabel: add a static label */ + #define AUDIT_MAC_UNLBL_STCDEL 1417 /* NetLabel: del a static label */ + ++#define AUDIT_APPARMOR_AUDIT 1501 /* AppArmor audited grants */ ++#define AUDIT_APPARMOR_ALLOWED 1502 /* Allowed Access for learning */ ++#define AUDIT_APPARMOR_DENIED 1503 ++#define AUDIT_APPARMOR_HINT 1504 /* Process Tracking information */ ++#define AUDIT_APPARMOR_STATUS 1505 /* Changes in config */ ++#define AUDIT_APPARMOR_ERROR 1506 /* Internal AppArmor Errors */ ++ + #define AUDIT_FIRST_KERN_ANOM_MSG 1700 + #define AUDIT_LAST_KERN_ANOM_MSG 1799 + #define AUDIT_ANOM_PROMISCUOUS 1700 /* Device changed promiscuous mode */ +@@ -545,6 +552,9 @@ extern void audit_log(struct audit_ + __attribute__((format(printf,4,5))); + + extern struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type); ++extern void audit_log_vformat(struct audit_buffer *ab, ++ const char *fmt, va_list args) ++ __attribute__((format(printf,2,0))); + extern void audit_log_format(struct audit_buffer *ab, + const char *fmt, ...) + __attribute__((format(printf,2,3))); +--- a/kernel/audit.c ++++ b/kernel/audit.c +@@ -1231,8 +1231,7 @@ static inline int audit_expand(struct au + * will be called a second time. Currently, we assume that a printk + * can't format message larger than 1024 bytes, so we don't either. + */ +-static void audit_log_vformat(struct audit_buffer *ab, const char *fmt, +- va_list args) ++void audit_log_vformat(struct audit_buffer *ab, const char *fmt, va_list args) + { + int len, avail; + struct sk_buff *skb; +@@ -1506,3 +1505,6 @@ EXPORT_SYMBOL(audit_log_start); + EXPORT_SYMBOL(audit_log_end); + EXPORT_SYMBOL(audit_log_format); + EXPORT_SYMBOL(audit_log); ++EXPORT_SYMBOL_GPL(audit_log_vformat); ++EXPORT_SYMBOL_GPL(audit_log_untrustedstring); ++EXPORT_SYMBOL_GPL(audit_log_d_path); diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-intree.diff b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-intree.diff new file mode 100644 index 0000000000..c50c4f6a2d --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-intree.diff @@ -0,0 +1,31 @@ +From: John Johansen +Subject: Add AppArmor LSM to security/Makefile + +Signed-off-by: John Johansen +Signed-off-by: Andreas Gruenbacher + +--- + security/Kconfig | 1 + + security/Makefile | 3 ++- + 2 files changed, 3 insertions(+), 1 deletion(-) + +--- a/security/Kconfig ++++ b/security/Kconfig +@@ -117,6 +117,7 @@ config SECURITY_DEFAULT_MMAP_MIN_ADDR + + source security/selinux/Kconfig + source security/smack/Kconfig ++source security/apparmor/Kconfig + + endmenu + +--- a/security/Makefile ++++ b/security/Makefile +@@ -14,5 +14,6 @@ obj-$(CONFIG_SECURITY) += security.o c + # Must precede capability.o in order to stack properly. + obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o + obj-$(CONFIG_SECURITY_SMACK) += smack/built-in.o +-obj-$(CONFIG_SECURITY_ROOTPLUG) += root_plug.o ++obj-$(CONFIG_SECURITY_APPARMOR) += commoncap.o apparmor/ ++ obj-$(CONFIG_SECURITY_ROOTPLUG) += root_plug.o + obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-lsm.diff b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-lsm.diff new file mode 100644 index 0000000000..410099a7ff --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-lsm.diff @@ -0,0 +1,910 @@ +From: John Johansen +Subject: AppArmor: Module and LSM hooks + +Module parameters, LSM hooks, initialization and teardown. + +Signed-off-by: John Johansen +Signed-off-by: Andreas Gruenbacher + +--- + security/apparmor/lsm.c | 895 ++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 895 insertions(+) + +--- /dev/null ++++ b/security/apparmor/lsm.c +@@ -0,0 +1,895 @@ ++/* ++ * Copyright (C) 1998-2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ * AppArmor LSM interface ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "apparmor.h" ++#include "inline.h" ++ ++/* Flag indicating whether initialization completed */ ++int apparmor_initialized = 0; ++ ++static int param_set_aabool(const char *val, struct kernel_param *kp); ++static int param_get_aabool(char *buffer, struct kernel_param *kp); ++#define param_check_aabool(name, p) __param_check(name, p, int) ++ ++static int param_set_aauint(const char *val, struct kernel_param *kp); ++static int param_get_aauint(char *buffer, struct kernel_param *kp); ++#define param_check_aauint(name, p) __param_check(name, p, int) ++ ++/* Flag values, also controllable via /sys/module/apparmor/parameters ++ * We define special types as we want to do additional mediation. ++ * ++ * Complain mode -- in complain mode access failures result in auditing only ++ * and task is allowed access. audit events are processed by userspace to ++ * generate policy. Default is 'enforce' (0). ++ * Value is also togglable per profile and referenced when global value is ++ * enforce. ++ */ ++int apparmor_complain = 0; ++module_param_named(complain, apparmor_complain, aabool, S_IRUSR | S_IWUSR); ++MODULE_PARM_DESC(apparmor_complain, "Toggle AppArmor complain mode"); ++ ++/* Debug mode */ ++int apparmor_debug = 0; ++module_param_named(debug, apparmor_debug, aabool, S_IRUSR | S_IWUSR); ++MODULE_PARM_DESC(apparmor_debug, "Toggle AppArmor debug mode"); ++ ++/* Audit mode */ ++int apparmor_audit = 0; ++module_param_named(audit, apparmor_audit, aabool, S_IRUSR | S_IWUSR); ++MODULE_PARM_DESC(apparmor_audit, "Toggle AppArmor audit mode"); ++ ++/* Syscall logging mode */ ++int apparmor_logsyscall = 0; ++module_param_named(logsyscall, apparmor_logsyscall, aabool, S_IRUSR | S_IWUSR); ++MODULE_PARM_DESC(apparmor_logsyscall, "Toggle AppArmor logsyscall mode"); ++ ++/* Maximum pathname length before accesses will start getting rejected */ ++unsigned int apparmor_path_max = 2 * PATH_MAX; ++module_param_named(path_max, apparmor_path_max, aauint, S_IRUSR | S_IWUSR); ++MODULE_PARM_DESC(apparmor_path_max, "Maximum pathname length allowed"); ++ ++/* Boot time disable flag */ ++#ifdef CONFIG_SECURITY_APPARMOR_DISABLE ++#define AA_ENABLED_PERMS 0600 ++#else ++#define AA_ENABLED_PERMS 0400 ++#endif ++static int param_set_aa_enabled(const char *val, struct kernel_param *kp); ++unsigned int apparmor_enabled = CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE; ++module_param_call(enabled, param_set_aa_enabled, param_get_aauint, ++ &apparmor_enabled, AA_ENABLED_PERMS); ++MODULE_PARM_DESC(apparmor_enabled, "Enable/Disable Apparmor on boot"); ++ ++static int __init apparmor_enabled_setup(char *str) ++{ ++ apparmor_enabled = simple_strtol(str, NULL, 0); ++ return 1; ++} ++__setup("apparmor=", apparmor_enabled_setup); ++ ++static int param_set_aabool(const char *val, struct kernel_param *kp) ++{ ++ if (aa_task_context(current)) ++ return -EPERM; ++ return param_set_bool(val, kp); ++} ++ ++static int param_get_aabool(char *buffer, struct kernel_param *kp) ++{ ++ if (aa_task_context(current)) ++ return -EPERM; ++ return param_get_bool(buffer, kp); ++} ++ ++static int param_set_aauint(const char *val, struct kernel_param *kp) ++{ ++ if (aa_task_context(current)) ++ return -EPERM; ++ return param_set_uint(val, kp); ++} ++ ++static int param_get_aauint(char *buffer, struct kernel_param *kp) ++{ ++ if (aa_task_context(current)) ++ return -EPERM; ++ return param_get_uint(buffer, kp); ++} ++ ++/* allow run time disabling of apparmor */ ++static int param_set_aa_enabled(const char *val, struct kernel_param *kp) ++{ ++ char *endp; ++ unsigned long l; ++ ++ if (!apparmor_initialized) { ++ apparmor_enabled = 0; ++ return 0; ++ } ++ ++ if (aa_task_context(current)) ++ return -EPERM; ++ ++ if (!apparmor_enabled) ++ return -EINVAL; ++ ++ if (!val) ++ return -EINVAL; ++ ++ l = simple_strtoul(val, &endp, 0); ++ if (endp == val || l != 0) ++ return -EINVAL; ++ ++ apparmor_enabled = 0; ++ apparmor_disable(); ++ return 0; ++} ++ ++static int aa_reject_syscall(struct task_struct *task, gfp_t flags, ++ const char *name) ++{ ++ struct aa_profile *profile = aa_get_profile(task); ++ int error = 0; ++ ++ if (profile) { ++ error = aa_audit_syscallreject(profile, flags, name); ++ aa_put_profile(profile); ++ } ++ ++ return error; ++} ++ ++static int apparmor_ptrace(struct task_struct *parent, ++ struct task_struct *child, unsigned int mode) ++{ ++ struct aa_task_context *cxt; ++ int error = 0; ++ ++ /* ++ * parent can ptrace child when ++ * - parent is unconfined ++ * - parent & child are in the same namespace && ++ * - parent is in complain mode ++ * - parent and child are confined by the same profile ++ * - parent profile has CAP_SYS_PTRACE ++ */ ++ ++ rcu_read_lock(); ++ cxt = aa_task_context(parent); ++ if (cxt) { ++ if (parent->nsproxy != child->nsproxy) { ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "ptrace"; ++ sa.gfp_mask = GFP_ATOMIC; ++ sa.parent = parent->pid; ++ sa.task = child->pid; ++ sa.info = "different namespaces"; ++ aa_audit_reject(cxt->profile, &sa); ++ error = -EPERM; ++ } else { ++ struct aa_task_context *child_cxt = ++ aa_task_context(child); ++ ++ error = aa_may_ptrace(cxt, child_cxt ? ++ child_cxt->profile : NULL); ++ if (PROFILE_COMPLAIN(cxt->profile)) { ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "ptrace"; ++ sa.gfp_mask = GFP_ATOMIC; ++ sa.parent = parent->pid; ++ sa.task = child->pid; ++ aa_audit_hint(cxt->profile, &sa); ++ } ++ } ++ } ++ rcu_read_unlock(); ++ ++ return error; ++} ++ ++static int apparmor_capable(struct task_struct *task, int cap) ++{ ++ int error; ++ struct aa_task_context *cxt; ++ ++ /* cap_capable returns 0 on success, else -EPERM */ ++ error = cap_capable(task, cap); ++ ++ rcu_read_lock(); ++ cxt = aa_task_context(task); ++ if (cxt && (!error || cap_raised(cxt->profile->set_caps, cap))) ++ error = aa_capability(cxt, cap); ++ rcu_read_unlock(); ++ ++ return error; ++} ++ ++static int apparmor_sysctl(struct ctl_table *table, int op) ++{ ++ struct aa_profile *profile = aa_get_profile(current); ++ int error = 0; ++ ++ if (profile) { ++ char *buffer, *name; ++ int mask; ++ ++ mask = 0; ++ if (op & 4) ++ mask |= MAY_READ; ++ if (op & 2) ++ mask |= MAY_WRITE; ++ ++ error = -ENOMEM; ++ buffer = (char*)__get_free_page(GFP_KERNEL); ++ if (!buffer) ++ goto out; ++ name = sysctl_pathname(table, buffer, PAGE_SIZE); ++ if (name && name - buffer >= 5) { ++ name -= 5; ++ memcpy(name, "/proc", 5); ++ error = aa_perm_path(profile, "sysctl", name, mask, 0); ++ } ++ free_page((unsigned long)buffer); ++ } ++ ++out: ++ aa_put_profile(profile); ++ return error; ++} ++ ++static int apparmor_bprm_set_security(struct linux_binprm *bprm) ++{ ++ /* handle capability bits with setuid, etc */ ++ cap_bprm_set_security(bprm); ++ /* already set based on script name */ ++ if (bprm->sh_bang) ++ return 0; ++ return aa_register(bprm); ++} ++ ++static int apparmor_bprm_secureexec(struct linux_binprm *bprm) ++{ ++ int ret = cap_bprm_secureexec(bprm); ++ ++ if (!ret && (unsigned long)bprm->security & AA_SECURE_EXEC_NEEDED) { ++ AA_DEBUG("%s: secureexec required for %s\n", ++ __FUNCTION__, bprm->filename); ++ ret = 1; ++ } ++ ++ return ret; ++} ++ ++static int apparmor_sb_mount(char *dev_name, struct path *path, char *type, ++ unsigned long flags, void *data) ++{ ++ return aa_reject_syscall(current, GFP_KERNEL, "mount"); ++} ++ ++static int apparmor_umount(struct vfsmount *mnt, int flags) ++{ ++ return aa_reject_syscall(current, GFP_KERNEL, "umount"); ++} ++ ++static int apparmor_inode_mkdir(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mask) ++{ ++ struct aa_profile *profile; ++ int error = 0; ++ ++ if (!mnt || !mediated_filesystem(dir)) ++ goto out; ++ ++ profile = aa_get_profile(current); ++ ++ if (profile) ++ error = aa_perm_dir(profile, "inode_mkdir", dentry, mnt, ++ MAY_WRITE); ++ ++ aa_put_profile(profile); ++ ++out: ++ return error; ++} ++ ++static int apparmor_inode_rmdir(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt) ++{ ++ struct aa_profile *profile; ++ int error = 0; ++ ++ if (!mnt || !mediated_filesystem(dir)) ++ goto out; ++ ++ profile = aa_get_profile(current); ++ ++ if (profile) ++ error = aa_perm_dir(profile, "inode_rmdir", dentry, mnt, ++ MAY_WRITE); ++ ++ aa_put_profile(profile); ++ ++out: ++ return error; ++} ++ ++static int aa_permission(const char *operation, struct inode *inode, ++ struct dentry *dentry, struct vfsmount *mnt, ++ int mask, int check) ++{ ++ int error = 0; ++ ++ if (mnt && mediated_filesystem(inode)) { ++ struct aa_profile *profile; ++ ++ profile = aa_get_profile(current); ++ if (profile) ++ error = aa_perm(profile, operation, dentry, mnt, mask, ++ check); ++ aa_put_profile(profile); ++ } ++ return error; ++} ++ ++static inline int aa_mask_permissions(int mask) ++{ ++ if (mask & MAY_APPEND) ++ mask &= (MAY_READ | MAY_APPEND | MAY_EXEC); ++ else ++ mask &= (MAY_READ | MAY_WRITE | MAY_EXEC); ++ return mask; ++} ++ ++static int apparmor_inode_create(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mask) ++{ ++ return aa_permission("inode_create", dir, dentry, mnt, MAY_APPEND, 0); ++} ++ ++static int apparmor_inode_link(struct dentry *old_dentry, ++ struct vfsmount *old_mnt, struct inode *dir, ++ struct dentry *new_dentry, ++ struct vfsmount *new_mnt) ++{ ++ int error = 0; ++ struct aa_profile *profile; ++ ++ if (!old_mnt || !new_mnt || !mediated_filesystem(dir)) ++ goto out; ++ ++ profile = aa_get_profile(current); ++ ++ if (profile) ++ error = aa_link(profile, new_dentry, new_mnt, ++ old_dentry, old_mnt); ++ ++ aa_put_profile(profile); ++ ++out: ++ return error; ++} ++ ++static int apparmor_inode_unlink(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt) ++{ ++ int check = 0; ++ ++ if (S_ISDIR(dentry->d_inode->i_mode)) ++ check |= AA_CHECK_DIR; ++ return aa_permission("inode_unlink", dir, dentry, mnt, MAY_WRITE, ++ check); ++} ++ ++static int apparmor_inode_symlink(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, const char *old_name) ++{ ++ return aa_permission("inode_symlink", dir, dentry, mnt, MAY_WRITE, 0); ++} ++ ++static int apparmor_inode_mknod(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode, dev_t dev) ++{ ++ return aa_permission("inode_mknod", dir, dentry, mnt, MAY_WRITE, 0); ++} ++ ++static int apparmor_inode_rename(struct inode *old_dir, ++ struct dentry *old_dentry, ++ struct vfsmount *old_mnt, ++ struct inode *new_dir, ++ struct dentry *new_dentry, ++ struct vfsmount *new_mnt) ++{ ++ struct aa_profile *profile; ++ int error = 0; ++ ++ if ((!old_mnt && !new_mnt) || !mediated_filesystem(old_dir)) ++ goto out; ++ ++ profile = aa_get_profile(current); ++ ++ if (profile) { ++ struct inode *inode = old_dentry->d_inode; ++ int check = 0; ++ ++ if (inode && S_ISDIR(inode->i_mode)) ++ check |= AA_CHECK_DIR; ++ if (old_mnt) ++ error = aa_perm(profile, "inode_rename", old_dentry, ++ old_mnt, MAY_READ | MAY_WRITE, check); ++ ++ if (!error && new_mnt) { ++ error = aa_perm(profile, "inode_rename", new_dentry, ++ new_mnt, MAY_WRITE, check); ++ } ++ } ++ ++ aa_put_profile(profile); ++ ++out: ++ return error; ++} ++ ++static int apparmor_inode_permission(struct inode *inode, int mask, ++ struct nameidata *nd) ++{ ++ int check = 0; ++ ++ if (!nd || nd->flags & (LOOKUP_PARENT | LOOKUP_CONTINUE)) ++ return 0; ++ mask = aa_mask_permissions(mask); ++ if (S_ISDIR(inode->i_mode)) { ++ check |= AA_CHECK_DIR; ++ /* allow traverse accesses to directories */ ++ mask &= ~MAY_EXEC; ++ } ++ return aa_permission("inode_permission", inode, nd->dentry, nd->mnt, ++ mask, check); ++} ++ ++static int apparmor_inode_setattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct iattr *iattr) ++{ ++ int error = 0; ++ ++ if (!mnt) ++ goto out; ++ ++ if (mediated_filesystem(dentry->d_inode)) { ++ struct aa_profile *profile; ++ ++ profile = aa_get_profile(current); ++ /* ++ * Mediate any attempt to change attributes of a file ++ * (chmod, chown, chgrp, etc) ++ */ ++ if (profile) ++ error = aa_attr(profile, dentry, mnt, iattr); ++ ++ aa_put_profile(profile); ++ } ++ ++out: ++ return error; ++} ++ ++static int aa_xattr_permission(struct dentry *dentry, struct vfsmount *mnt, ++ const char *operation, int mask, ++ struct file *file) ++{ ++ int error = 0; ++ ++ if (mnt && mediated_filesystem(dentry->d_inode)) { ++ struct aa_profile *profile = aa_get_profile(current); ++ int check = file ? AA_CHECK_FD : 0; ++ ++ if (profile) ++ error = aa_perm_xattr(profile, operation, dentry, mnt, ++ mask, check); ++ aa_put_profile(profile); ++ } ++ ++ return error; ++} ++ ++static int apparmor_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name, const void *value, ++ size_t size, int flags, struct file *file) ++{ ++ int error = cap_inode_setxattr(dentry, mnt, name, value, size, flags, ++ file); ++ ++ if (!error) ++ error = aa_xattr_permission(dentry, mnt, "xattr set", ++ MAY_WRITE, file); ++ return error; ++} ++ ++static int apparmor_inode_getxattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name, struct file *file) ++{ ++ return aa_xattr_permission(dentry, mnt, "xattr get", MAY_READ, file); ++} ++ ++static int apparmor_inode_listxattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct file *file) ++{ ++ return aa_xattr_permission(dentry, mnt, "xattr list", MAY_READ, file); ++} ++ ++static int apparmor_inode_removexattr(struct dentry *dentry, ++ struct vfsmount *mnt, const char *name, ++ struct file *file) ++{ ++ return aa_xattr_permission(dentry, mnt, "xattr remove", MAY_WRITE, ++ file); ++} ++ ++static int aa_file_permission(const char *op, struct file *file, int mask) ++{ ++ struct aa_profile *profile; ++ struct aa_profile *file_profile = (struct aa_profile*)file->f_security; ++ int error = 0; ++ ++ if (!file_profile) ++ goto out; ++ ++ /* ++ * If this file was opened under a different profile, we ++ * revalidate the access against the current profile. ++ */ ++ profile = aa_get_profile(current); ++ if (profile && (file_profile != profile || mask & AA_MAY_LOCK)) { ++ struct dentry *dentry = file->f_dentry; ++ struct vfsmount *mnt = file->f_vfsmnt; ++ struct inode *inode = dentry->d_inode; ++ int check = AA_CHECK_FD; ++ ++ /* ++ * FIXME: We should remember which profiles we revalidated ++ * against. ++ */ ++ if (S_ISDIR(inode->i_mode)) ++ check |= AA_CHECK_DIR; ++ error = aa_permission(op, inode, dentry, mnt, mask, check); ++ } ++ aa_put_profile(profile); ++ ++out: ++ return error; ++} ++ ++static int apparmor_file_permission(struct file *file, int mask) ++{ ++ return aa_file_permission("file_permission", file, ++ aa_mask_permissions(mask)); ++} ++ ++static inline int apparmor_file_lock (struct file *file, unsigned int cmd) ++{ ++ int mask = AA_MAY_LOCK; ++ if (cmd == F_WRLCK) ++ mask |= MAY_WRITE; ++ return aa_file_permission("file_lock", file, mask); ++} ++ ++static int apparmor_file_alloc_security(struct file *file) ++{ ++ struct aa_profile *profile; ++ ++ profile = aa_get_profile(current); ++ if (profile) ++ file->f_security = profile; ++ ++ return 0; ++} ++ ++static void apparmor_file_free_security(struct file *file) ++{ ++ struct aa_profile *file_profile = (struct aa_profile*)file->f_security; ++ ++ aa_put_profile(file_profile); ++} ++ ++static inline int aa_mmap(struct file *file, const char *operation, ++ unsigned long prot, unsigned long flags) ++{ ++ struct dentry *dentry; ++ int mask = 0; ++ ++ if (!file || !file->f_security) ++ return 0; ++ ++ if (prot & PROT_READ) ++ mask |= MAY_READ; ++ /* Private mappings don't require write perms since they don't ++ * write back to the files */ ++ if ((prot & PROT_WRITE) && !(flags & MAP_PRIVATE)) ++ mask |= MAY_WRITE; ++ if (prot & PROT_EXEC) ++ mask |= AA_EXEC_MMAP; ++ ++ dentry = file->f_dentry; ++ return aa_permission(operation, dentry->d_inode, dentry, ++ file->f_vfsmnt, mask, AA_CHECK_FD); ++} ++ ++static int apparmor_file_mmap(struct file *file, unsigned long reqprot, ++ unsigned long prot, unsigned long flags, ++ unsigned long addr, unsigned long addr_only) ++{ ++ if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO)) { ++ struct aa_profile *profile = aa_get_profile(current); ++ if (profile) ++ /* future control check here */ ++ return -EACCES; ++ else ++ return -EACCES; ++ aa_put_profile(profile); ++ } ++ ++ return aa_mmap(file, "file_mmap", prot, flags); ++} ++ ++static int apparmor_file_mprotect(struct vm_area_struct *vma, ++ unsigned long reqprot, unsigned long prot) ++{ ++ return aa_mmap(vma->vm_file, "file_mprotect", prot, ++ !(vma->vm_flags & VM_SHARED) ? MAP_PRIVATE : 0); ++} ++ ++static int apparmor_task_alloc_security(struct task_struct *task) ++{ ++ return aa_clone(task); ++} ++ ++/* ++ * Called from IRQ context from RCU callback. ++ */ ++static void apparmor_task_free_security(struct task_struct *task) ++{ ++ aa_release(task); ++} ++ ++static int apparmor_getprocattr(struct task_struct *task, char *name, ++ char **value) ++{ ++ unsigned len; ++ int error; ++ struct aa_profile *profile; ++ ++ /* AppArmor only supports the "current" process attribute */ ++ if (strcmp(name, "current") != 0) ++ return -EINVAL; ++ ++ /* must be task querying itself or admin */ ++ if (current != task && !capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ profile = aa_get_profile(task); ++ error = aa_getprocattr(profile, value, &len); ++ aa_put_profile(profile); ++ if (!error) ++ error = len; ++ ++ return error; ++} ++ ++static int apparmor_setprocattr(struct task_struct *task, char *name, ++ void *value, size_t size) ++{ ++ char *command, *args; ++ int error; ++ ++ if (strcmp(name, "current") != 0 || size == 0 || size >= PAGE_SIZE) ++ return -EINVAL; ++ args = value; ++ args[size] = '\0'; ++ args = strstrip(args); ++ command = strsep(&args, " "); ++ if (!args) ++ return -EINVAL; ++ while (isspace(*args)) ++ args++; ++ if (!*args) ++ return -EINVAL; ++ ++ if (strcmp(command, "changehat") == 0) { ++ if (current != task) ++ return -EACCES; ++ error = aa_setprocattr_changehat(args); ++ } else if (strcmp(command, "changeprofile") == 0) { ++ if (current != task) ++ return -EACCES; ++ error = aa_setprocattr_changeprofile(args); ++ } else if (strcmp(command, "setprofile") == 0) { ++ struct aa_profile *profile; ++ ++ /* Only an unconfined process with admin capabilities ++ * may change the profile of another task. ++ */ ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EACCES; ++ ++ profile = aa_get_profile(current); ++ if (profile) { ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "profile_set"; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.task = task->pid; ++ sa.info = "from confined process"; ++ aa_audit_reject(profile, &sa); ++ aa_put_profile(profile); ++ return -EACCES; ++ } ++ error = aa_setprocattr_setprofile(task, args); ++ } else { ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "setprocattr"; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.info = "invalid command"; ++ sa.name = command; ++ sa.task = task->pid; ++ aa_audit_reject(NULL, &sa); ++ return -EINVAL; ++ } ++ ++ if (!error) ++ error = size; ++ return error; ++} ++ ++struct security_operations apparmor_ops = { ++ .ptrace = apparmor_ptrace, ++ .capget = cap_capget, ++ .capset_check = cap_capset_check, ++ .capset_set = cap_capset_set, ++ .sysctl = apparmor_sysctl, ++ .capable = apparmor_capable, ++ .syslog = cap_syslog, ++ ++ .netlink_send = cap_netlink_send, ++ .netlink_recv = cap_netlink_recv, ++ ++ .bprm_apply_creds = cap_bprm_apply_creds, ++ .bprm_set_security = apparmor_bprm_set_security, ++ .bprm_secureexec = apparmor_bprm_secureexec, ++ ++ .sb_mount = apparmor_sb_mount, ++ .sb_umount = apparmor_umount, ++ ++ .inode_mkdir = apparmor_inode_mkdir, ++ .inode_rmdir = apparmor_inode_rmdir, ++ .inode_create = apparmor_inode_create, ++ .inode_link = apparmor_inode_link, ++ .inode_unlink = apparmor_inode_unlink, ++ .inode_symlink = apparmor_inode_symlink, ++ .inode_mknod = apparmor_inode_mknod, ++ .inode_rename = apparmor_inode_rename, ++ .inode_permission = apparmor_inode_permission, ++ .inode_setattr = apparmor_inode_setattr, ++ .inode_setxattr = apparmor_inode_setxattr, ++ .inode_getxattr = apparmor_inode_getxattr, ++ .inode_listxattr = apparmor_inode_listxattr, ++ .inode_removexattr = apparmor_inode_removexattr, ++ .file_permission = apparmor_file_permission, ++ .file_alloc_security = apparmor_file_alloc_security, ++ .file_free_security = apparmor_file_free_security, ++ .file_mmap = apparmor_file_mmap, ++ .file_mprotect = apparmor_file_mprotect, ++ .file_lock = apparmor_file_lock, ++ ++ .task_alloc_security = apparmor_task_alloc_security, ++ .task_free_security = apparmor_task_free_security, ++ .task_post_setuid = cap_task_post_setuid, ++ .task_reparent_to_init = cap_task_reparent_to_init, ++ ++ .getprocattr = apparmor_getprocattr, ++ .setprocattr = apparmor_setprocattr, ++}; ++ ++void info_message(const char *str) ++{ ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.gfp_mask = GFP_KERNEL; ++ sa.info = str; ++ printk(KERN_INFO "AppArmor: %s\n", str); ++ if (audit_enabled) ++ aa_audit_message(NULL, &sa, AUDIT_APPARMOR_STATUS); ++} ++ ++static int __init apparmor_init(void) ++{ ++ int error; ++ ++ if (!apparmor_enabled) { ++ info_message("AppArmor disabled by boottime parameter\n"); ++ return 0; ++ } ++ ++ if ((error = create_apparmorfs())) { ++ AA_ERROR("Unable to activate AppArmor filesystem\n"); ++ goto createfs_out; ++ } ++ ++ if ((error = alloc_default_namespace())){ ++ AA_ERROR("Unable to allocate default profile namespace\n"); ++ goto alloc_out; ++ } ++ ++ if ((error = register_security(&apparmor_ops))) { ++ AA_ERROR("Unable to register AppArmor\n"); ++ goto register_security_out; ++ } ++ ++ /* Report that AppArmor successfully initialized */ ++ apparmor_initialized = 1; ++ if (apparmor_complain) ++ info_message("AppArmor initialized: complainmode enabled"); ++ else ++ info_message("AppArmor initialized"); ++ ++ return error; ++ ++register_security_out: ++ free_default_namespace(); ++ ++alloc_out: ++ destroy_apparmorfs(); ++ ++createfs_out: ++ return error; ++ ++} ++ ++security_initcall(apparmor_init); ++ ++void apparmor_disable(void) ++{ ++ /* Remove and release all the profiles on the profile list. */ ++ mutex_lock(&aa_interface_lock); ++ aa_profile_ns_list_release(); ++ ++ /* FIXME: cleanup profiles references on files */ ++ free_default_namespace(); ++ ++ /* ++ * Delay for an rcu cycle to make sure that all active task ++ * context readers have finished, and all profiles have been ++ * freed by their rcu callbacks. ++ */ ++ synchronize_rcu(); ++ ++ destroy_apparmorfs(); ++ mutex_unlock(&aa_interface_lock); ++ ++ apparmor_initialized = 0; ++ ++ info_message("AppArmor protection removed"); ++} ++ ++MODULE_DESCRIPTION("AppArmor process confinement"); ++MODULE_AUTHOR("Novell/Immunix, http://bugs.opensuse.org"); ++MODULE_LICENSE("GPL"); diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-main.diff b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-main.diff new file mode 100644 index 0000000000..04734fdefa --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-main.diff @@ -0,0 +1,1493 @@ +From: John Johansen +Subject: AppArmor: Main Part + +The underlying functions by which the AppArmor LSM hooks are implemented. + +Signed-off-by: John Johansen +Signed-off-by: Andreas Gruenbacher + +--- + security/apparmor/main.c | 1478 +++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 1478 insertions(+) + +--- /dev/null ++++ b/security/apparmor/main.c +@@ -0,0 +1,1478 @@ ++/* ++ * Copyright (C) 2002-2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ * AppArmor Core ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "apparmor.h" ++ ++#include "inline.h" ++ ++/* ++ * Table of capability names: we generate it from capabilities.h. ++ */ ++static const char *capability_names[] = { ++#include "capability_names.h" ++}; ++ ++struct aa_namespace *default_namespace; ++ ++static int aa_inode_mode(struct inode *inode) ++{ ++ /* if the inode doesn't exist the user is creating it */ ++ if (!inode || current->fsuid == inode->i_uid) ++ return AA_USER_SHIFT; ++ return AA_OTHER_SHIFT; ++} ++ ++int alloc_default_namespace(void) ++{ ++ struct aa_namespace *ns; ++ char *name = kstrdup("default", GFP_KERNEL); ++ if (!name) ++ return -ENOMEM; ++ ns = alloc_aa_namespace(name); ++ if (!ns) { ++ kfree(name); ++ return -ENOMEM; ++ } ++ ++ write_lock(&profile_ns_list_lock); ++ default_namespace = ns; ++ aa_get_namespace(ns); ++ list_add(&ns->list, &profile_ns_list); ++ write_unlock(&profile_ns_list_lock); ++ ++ return 0; ++} ++ ++void free_default_namespace(void) ++{ ++ write_lock(&profile_ns_list_lock); ++ list_del_init(&default_namespace->list); ++ write_unlock(&profile_ns_list_lock); ++ aa_put_namespace(default_namespace); ++ default_namespace = NULL; ++} ++ ++static void aa_audit_file_sub_mask(struct audit_buffer *ab, char *buffer, ++ int mask) ++{ ++ const char unsafex[] = "upcn"; ++ const char safex[] = "UPCN"; ++ char *m = buffer; ++ ++ if (mask & AA_EXEC_MMAP) ++ *m++ = 'm'; ++ if (mask & MAY_READ) ++ *m++ = 'r'; ++ if (mask & MAY_WRITE) ++ *m++ = 'w'; ++ else if (mask & MAY_APPEND) ++ *m++ = 'a'; ++ if (mask & MAY_EXEC) { ++ int index = AA_EXEC_INDEX(mask); ++ /* all indexes > 4 are also named transitions */ ++ if (index > 4) ++ index = 4; ++ if (index > 0) { ++ if (mask & AA_EXEC_UNSAFE) ++ *m++ = unsafex[index - 1]; ++ else ++ *m++ = safex[index - 1]; ++ } ++ if (mask & AA_EXEC_INHERIT) ++ *m++ = 'i'; ++ *m++ = 'x'; ++ } ++ if (mask & AA_MAY_LINK) ++ *m++ = 'l'; ++ if (mask & AA_MAY_LOCK) ++ *m++ = 'k'; ++ *m++ = '\0'; ++} ++ ++static void aa_audit_file_mask(struct audit_buffer *ab, const char *name, ++ int mask) ++{ ++ char user[10], other[10]; ++ ++ aa_audit_file_sub_mask(ab, user, ++ (mask & AA_USER_PERMS) >> AA_USER_SHIFT); ++ aa_audit_file_sub_mask(ab, other, ++ (mask & AA_OTHER_PERMS) >> AA_OTHER_SHIFT); ++ ++ audit_log_format(ab, " %s=\"%s::%s\"", name, user, other); ++} ++ ++/** ++ * aa_audit - Log an audit event to the audit subsystem ++ * @profile: profile to check against ++ * @sa: audit event ++ * @audit_cxt: audit context to log message to ++ * @type: audit event number ++ */ ++static int aa_audit_base(struct aa_profile *profile, struct aa_audit *sa, ++ struct audit_context *audit_cxt, int type) ++{ ++ struct audit_buffer *ab = NULL; ++ ++ ab = audit_log_start(audit_cxt, sa->gfp_mask, type); ++ ++ if (!ab) { ++ AA_ERROR("Unable to log event (%d) to audit subsys\n", ++ type); ++ /* don't fail operations in complain mode even if logging ++ * fails */ ++ return type == AUDIT_APPARMOR_ALLOWED ? 0 : -ENOMEM; ++ } ++ ++ if (sa->operation) ++ audit_log_format(ab, "operation=\"%s\"", sa->operation); ++ ++ if (sa->info) { ++ audit_log_format(ab, " info=\"%s\"", sa->info); ++ if (sa->error_code) ++ audit_log_format(ab, " error=%d", sa->error_code); ++ } ++ ++ if (sa->request_mask) ++ aa_audit_file_mask(ab, "requested_mask", sa->request_mask); ++ ++ if (sa->denied_mask) ++ aa_audit_file_mask(ab, "denied_mask", sa->denied_mask); ++ ++ if (sa->request_mask) ++ audit_log_format(ab, " fsuid=%d", current->fsuid); ++ ++ if (sa->iattr) { ++ struct iattr *iattr = sa->iattr; ++ ++ audit_log_format(ab, " attribute=\"%s%s%s%s%s%s%s\"", ++ iattr->ia_valid & ATTR_MODE ? "mode," : "", ++ iattr->ia_valid & ATTR_UID ? "uid," : "", ++ iattr->ia_valid & ATTR_GID ? "gid," : "", ++ iattr->ia_valid & ATTR_SIZE ? "size," : "", ++ iattr->ia_valid & (ATTR_ATIME | ATTR_ATIME_SET) ? ++ "atime," : "", ++ iattr->ia_valid & (ATTR_MTIME | ATTR_MTIME_SET) ? ++ "mtime," : "", ++ iattr->ia_valid & ATTR_CTIME ? "ctime," : ""); ++ } ++ ++ if (sa->task) ++ audit_log_format(ab, " task=%d", sa->task); ++ ++ if (sa->parent) ++ audit_log_format(ab, " parent=%d", sa->parent); ++ ++ if (sa->name) { ++ audit_log_format(ab, " name="); ++ audit_log_untrustedstring(ab, sa->name); ++ } ++ ++ if (sa->name2) { ++ audit_log_format(ab, " name2="); ++ audit_log_untrustedstring(ab, sa->name2); ++ } ++ ++ audit_log_format(ab, " pid=%d", current->pid); ++ ++ if (profile) { ++ audit_log_format(ab, " profile="); ++ audit_log_untrustedstring(ab, profile->name); ++ ++ if (profile->ns != default_namespace) { ++ audit_log_format(ab, " namespace="); ++ audit_log_untrustedstring(ab, profile->ns->name); ++ } ++ } ++ ++ audit_log_end(ab); ++ ++ return type == AUDIT_APPARMOR_ALLOWED ? 0 : sa->error_code; ++} ++ ++/** ++ * aa_audit_syscallreject - Log a syscall rejection to the audit subsystem ++ * @profile: profile to check against ++ * @gfp: memory allocation flags ++ * @msg: string describing syscall being rejected ++ */ ++int aa_audit_syscallreject(struct aa_profile *profile, gfp_t gfp, ++ const char *msg) ++{ ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "syscall"; ++ sa.name = msg; ++ sa.gfp_mask = gfp; ++ sa.error_code = -EPERM; ++ ++ return aa_audit_base(profile, &sa, current->audit_context, ++ AUDIT_APPARMOR_DENIED); ++} ++ ++int aa_audit_message(struct aa_profile *profile, struct aa_audit *sa, ++ int type) ++{ ++ struct audit_context *audit_cxt; ++ ++ audit_cxt = apparmor_logsyscall ? current->audit_context : NULL; ++ return aa_audit_base(profile, sa, audit_cxt, type); ++} ++ ++void aa_audit_hint(struct aa_profile *profile, struct aa_audit *sa) ++{ ++ aa_audit_message(profile, sa, AUDIT_APPARMOR_HINT); ++} ++ ++void aa_audit_status(struct aa_profile *profile, struct aa_audit *sa) ++{ ++ aa_audit_message(profile, sa, AUDIT_APPARMOR_STATUS); ++} ++ ++int aa_audit_reject(struct aa_profile *profile, struct aa_audit *sa) ++{ ++ return aa_audit_message(profile, sa, AUDIT_APPARMOR_DENIED); ++} ++ ++/** ++ * aa_audit - Log an audit event to the audit subsystem ++ * @profile: profile to check against ++ * @sa: audit event ++ */ ++int aa_audit(struct aa_profile *profile, struct aa_audit *sa) ++{ ++ int type = AUDIT_APPARMOR_DENIED; ++ struct audit_context *audit_cxt; ++ ++ if (likely(!sa->error_code)) ++ type = AUDIT_APPARMOR_AUDIT; ++ else if (PROFILE_COMPLAIN(profile)) ++ type = AUDIT_APPARMOR_ALLOWED; ++ ++ audit_cxt = apparmor_logsyscall ? current->audit_context : NULL; ++ return aa_audit_base(profile, sa, audit_cxt, type); ++} ++ ++static int aa_audit_file(struct aa_profile *profile, struct aa_audit *sa) ++{ ++ if (likely(!sa->error_code)) { ++ int mask = sa->audit_mask & AUDIT_FILE_MASK; ++ ++ if (unlikely(PROFILE_AUDIT(profile))) ++ mask |= AUDIT_FILE_MASK; ++ ++ if (likely(!(sa->request_mask & mask))) ++ return 0; ++ ++ /* mask off perms that are not being force audited */ ++ sa->request_mask &= mask | ALL_AA_EXEC_TYPE; ++ } else { ++ int mask = AUDIT_QUIET_MASK(sa->audit_mask); ++ ++ if (!(sa->denied_mask & ~mask)) ++ return sa->error_code; ++ ++ /* mask off perms whose denial is being silenced */ ++ sa->denied_mask &= (~mask) | ALL_AA_EXEC_TYPE; ++ } ++ ++ return aa_audit(profile, sa); ++} ++ ++static int aa_audit_caps(struct aa_profile *profile, struct aa_audit *sa, ++ int cap) ++{ ++ if (likely(!sa->error_code)) { ++ if (likely(!PROFILE_AUDIT(profile) && ++ !cap_raised(profile->audit_caps, cap))) ++ return 0; ++ } ++ ++ /* quieting of capabilities is handled the caps_logged cache */ ++ return aa_audit(profile, sa); ++} ++ ++/** ++ * aa_file_denied - check for @mask access on a file ++ * @profile: profile to check against ++ * @name: pathname of file ++ * @mask: permission mask requested for file ++ * @audit_mask: return audit mask for the match ++ * ++ * Return %0 on success, or else the permissions in @mask that the ++ * profile denies. ++ */ ++static int aa_file_denied(struct aa_profile *profile, const char *name, ++ int mask, int *audit_mask) ++{ ++ return (mask & ~aa_match(profile->file_rules, name, audit_mask)); ++} ++ ++/** ++ * aa_link_denied - check for permission to link a file ++ * @profile: profile to check against ++ * @link: pathname of link being created ++ * @target: pathname of target to be linked to ++ * @target_mode: UGO shift for target inode ++ * @request_mask: the permissions subset valid only if link succeeds ++ * @audit_mask: return the audit_mask for the link permission ++ * Return %0 on success, or else the permissions that the profile denies. ++ */ ++static int aa_link_denied(struct aa_profile *profile, const char *link, ++ const char *target, int target_mode, ++ int *request_mask, int *audit_mask) ++{ ++ unsigned int state; ++ int l_mode, t_mode, l_x, t_x, denied_mask = 0; ++ int link_mask = AA_MAY_LINK << target_mode; ++ ++ *request_mask = link_mask; ++ ++ l_mode = aa_match_state(profile->file_rules, DFA_START, link, &state); ++ ++ if (l_mode & link_mask) { ++ int mode; ++ /* test to see if target can be paired with link */ ++ state = aa_dfa_null_transition(profile->file_rules, state); ++ mode = aa_match_state(profile->file_rules, state, target, ++ &state); ++ ++ if (!(mode & link_mask)) ++ denied_mask |= link_mask; ++ ++ *audit_mask = dfa_audit_mask(profile->file_rules, state); ++ ++ /* return if link subset test is not required */ ++ if (!(mode & (AA_LINK_SUBSET_TEST << target_mode))) ++ return denied_mask; ++ } ++ ++ /* Do link perm subset test requiring permission on link are a ++ * subset of the permissions on target. ++ * If a subset test is required a permission subset test of the ++ * perms for the link are done against the user::other of the ++ * target's 'r', 'w', 'x', 'a', 'k', and 'm' permissions. ++ * ++ * If the link has 'x', an exact match of all the execute flags ++ * must match. ++ */ ++ denied_mask |= ~l_mode & link_mask; ++ ++ t_mode = aa_match(profile->file_rules, target, NULL); ++ ++ l_x = l_mode & (ALL_AA_EXEC_TYPE | AA_EXEC_BITS); ++ t_x = t_mode & (ALL_AA_EXEC_TYPE | AA_EXEC_BITS); ++ ++ /* For actual subset test ignore valid-profile-transition flags, ++ * and link bits ++ */ ++ l_mode &= AA_FILE_PERMS & ~AA_LINK_BITS; ++ t_mode &= AA_FILE_PERMS & ~AA_LINK_BITS; ++ ++ *request_mask = l_mode | link_mask; ++ ++ if (l_mode) { ++ int x = l_x | (t_x & ALL_AA_EXEC_UNSAFE); ++ denied_mask |= l_mode & ~t_mode; ++ /* mask off x modes not used by link */ ++ ++ /* handle exec subset ++ * - link safe exec issubset of unsafe exec ++ * - no link x perm is subset of target having x perm ++ */ ++ if ((l_mode & AA_USER_EXEC) && ++ (x & AA_USER_EXEC_TYPE) != (t_x & AA_USER_EXEC_TYPE)) ++ denied_mask = AA_USER_EXEC | (l_x & AA_USER_EXEC_TYPE); ++ if ((l_mode & AA_OTHER_EXEC) && ++ (x & AA_OTHER_EXEC_TYPE) != (t_x & AA_OTHER_EXEC_TYPE)) ++ denied_mask = AA_OTHER_EXEC | (l_x & AA_OTHER_EXEC_TYPE); ++ } ++ ++ return denied_mask; ++} ++ ++/** ++ * aa_get_name - compute the pathname of a file ++ * @dentry: dentry of the file ++ * @mnt: vfsmount of the file ++ * @buffer: buffer that aa_get_name() allocated ++ * @check: AA_CHECK_DIR is set if the file is a directory ++ * ++ * Returns a pointer to the beginning of the pathname (which usually differs ++ * from the beginning of the buffer), or an error code. ++ * ++ * We need @check to indicate whether the file is a directory or not because ++ * the file may not yet exist, and so we cannot check the inode's file type. ++ */ ++static char *aa_get_name(struct dentry *dentry, struct vfsmount *mnt, ++ char **buffer, int check) ++{ ++ char *name; ++ int is_dir, size = 256; ++ ++ is_dir = (check & AA_CHECK_DIR) ? 1 : 0; ++ ++ for (;;) { ++ char *buf = kmalloc(size, GFP_KERNEL); ++ if (!buf) ++ return ERR_PTR(-ENOMEM); ++ ++ name = d_namespace_path(dentry, mnt, buf, size - is_dir); ++ if (!IS_ERR(name)) { ++ if (name[0] != '/') { ++ /* ++ * This dentry is not connected to the ++ * namespace root -- reject access. ++ */ ++ kfree(buf); ++ return ERR_PTR(-ENOENT); ++ } ++ if (is_dir && name[1] != '\0') { ++ /* ++ * Append "/" to the pathname. The root ++ * directory is a special case; it already ++ * ends in slash. ++ */ ++ buf[size - 2] = '/'; ++ buf[size - 1] = '\0'; ++ } ++ ++ *buffer = buf; ++ return name; ++ } ++ if (PTR_ERR(name) != -ENAMETOOLONG) ++ return name; ++ ++ kfree(buf); ++ size <<= 1; ++ if (size > apparmor_path_max) ++ return ERR_PTR(-ENAMETOOLONG); ++ } ++} ++ ++static char *new_compound_name(const char *n1, const char *n2) ++{ ++ char *name = kmalloc(strlen(n1) + strlen(n2) + 3, GFP_KERNEL); ++ if (name) ++ sprintf(name, "%s//%s", n1, n2); ++ return name; ++} ++static inline void aa_put_name_buffer(char *buffer) ++{ ++ kfree(buffer); ++} ++ ++/** ++ * aa_perm_dentry - check if @profile allows @mask for a file ++ * @profile: profile to check against ++ * @dentry: dentry of the file ++ * @mnt: vfsmount o the file ++ * @sa: audit context ++ * @mask: requested profile permissions ++ * @check: kind of check to perform ++ * ++ * Returns 0 upon success, or else an error code. ++ * ++ * @check indicates the file type, and whether the file was accessed through ++ * an open file descriptor (AA_CHECK_FD) or not. ++ */ ++static int aa_perm_dentry(struct aa_profile *profile, struct dentry *dentry, ++ struct vfsmount *mnt, struct aa_audit *sa, int check) ++{ ++ int error; ++ char *buffer = NULL; ++ ++ sa->name = aa_get_name(dentry, mnt, &buffer, check); ++ sa->request_mask <<= aa_inode_mode(dentry->d_inode); ++ if (IS_ERR(sa->name)) { ++ /* ++ * deleted files are given a pass on permission checks when ++ * accessed through a file descriptor. ++ */ ++ if (PTR_ERR(sa->name) == -ENOENT && (check & AA_CHECK_FD)) ++ sa->denied_mask = 0; ++ else { ++ sa->denied_mask = sa->request_mask; ++ sa->error_code = PTR_ERR(sa->name); ++ if (sa->error_code == -ENOENT) ++ sa->info = "Failed name resolution - object not a valid entry"; ++ else if (sa->error_code == -ENAMETOOLONG) ++ sa->info = "Failed name resolution - name too long"; ++ else ++ sa->info = "Failed name resolution"; ++ } ++ sa->name = NULL; ++ } else ++ sa->denied_mask = aa_file_denied(profile, sa->name, ++ sa->request_mask, ++ &sa->audit_mask); ++ ++ if (!sa->denied_mask) ++ sa->error_code = 0; ++ ++ error = aa_audit_file(profile, sa); ++ aa_put_name_buffer(buffer); ++ ++ return error; ++} ++ ++/** ++ * aa_attr - check if attribute change is allowed ++ * @profile: profile to check against ++ * @dentry: dentry of the file to check ++ * @mnt: vfsmount of the file to check ++ * @iattr: attribute changes requested ++ */ ++int aa_attr(struct aa_profile *profile, struct dentry *dentry, ++ struct vfsmount *mnt, struct iattr *iattr) ++{ ++ struct inode *inode = dentry->d_inode; ++ int error, check; ++ struct aa_audit sa; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "setattr"; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.iattr = iattr; ++ sa.request_mask = MAY_WRITE; ++ sa.error_code = -EACCES; ++ ++ check = 0; ++ if (inode && S_ISDIR(inode->i_mode)) ++ check |= AA_CHECK_DIR; ++ if (iattr->ia_valid & ATTR_FILE) ++ check |= AA_CHECK_FD; ++ ++ error = aa_perm_dentry(profile, dentry, mnt, &sa, check); ++ ++ return error; ++} ++ ++/** ++ * aa_perm_xattr - check if xattr attribute change is allowed ++ * @profile: profile to check against ++ * @dentry: dentry of the file to check ++ * @mnt: vfsmount of the file to check ++ * @operation: xattr operation being done ++ * @mask: access mode requested ++ * @check: kind of check to perform ++ */ ++int aa_perm_xattr(struct aa_profile *profile, const char *operation, ++ struct dentry *dentry, struct vfsmount *mnt, int mask, ++ int check) ++{ ++ struct inode *inode = dentry->d_inode; ++ int error; ++ struct aa_audit sa; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = operation; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.request_mask = mask; ++ sa.error_code = -EACCES; ++ ++ if (inode && S_ISDIR(inode->i_mode)) ++ check |= AA_CHECK_DIR; ++ ++ error = aa_perm_dentry(profile, dentry, mnt, &sa, check); ++ ++ return error; ++} ++ ++/** ++ * aa_perm - basic apparmor permissions check ++ * @profile: profile to check against ++ * @dentry: dentry of the file to check ++ * @mnt: vfsmount of the file to check ++ * @mask: access mode requested ++ * @check: kind of check to perform ++ * ++ * Determine if access @mask for the file is authorized by @profile. ++ * Returns 0 on success, or else an error code. ++ */ ++int aa_perm(struct aa_profile *profile, const char *operation, ++ struct dentry *dentry, struct vfsmount *mnt, int mask, int check) ++{ ++ struct aa_audit sa; ++ int error = 0; ++ ++ if (mask == 0) ++ goto out; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = operation; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.request_mask = mask; ++ sa.error_code = -EACCES; ++ ++ error = aa_perm_dentry(profile, dentry, mnt, &sa, check); ++ ++out: ++ return error; ++} ++ ++/** ++ * aa_perm_dir ++ * @profile: profile to check against ++ * @dentry: dentry of directory to check ++ * @mnt: vfsmount of directory to check ++ * @operation: directory operation being performed ++ * @mask: access mode requested ++ * ++ * Determine if directory operation (make/remove) for dentry is authorized ++ * by @profile. ++ * Returns 0 on success, or else an error code. ++ */ ++int aa_perm_dir(struct aa_profile *profile, const char *operation, ++ struct dentry *dentry, struct vfsmount *mnt, int mask) ++{ ++ struct aa_audit sa; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = operation; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.request_mask = mask; ++ sa.error_code = -EACCES; ++ ++ return aa_perm_dentry(profile, dentry, mnt, &sa, AA_CHECK_DIR); ++} ++ ++int aa_perm_path(struct aa_profile *profile, const char *operation, ++ const char *name, int mask, uid_t uid) ++{ ++ struct aa_audit sa; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = operation; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.request_mask = mask; ++ sa.name = name; ++ if (current->fsuid == uid) ++ sa.request_mask = mask << AA_USER_SHIFT; ++ else ++ sa.request_mask = mask << AA_OTHER_SHIFT; ++ ++ sa.denied_mask = aa_file_denied(profile, name, sa.request_mask, ++ &sa.audit_mask) ; ++ sa.error_code = sa.denied_mask ? -EACCES : 0; ++ ++ return aa_audit_file(profile, &sa); ++} ++ ++/** ++ * aa_capability - test permission to use capability ++ * @cxt: aa_task_context with profile to check against ++ * @cap: capability to be tested ++ * ++ * Look up capability in profile capability set. ++ * Returns 0 on success, or else an error code. ++ */ ++int aa_capability(struct aa_task_context *cxt, int cap) ++{ ++ int error = cap_raised(cxt->profile->capabilities, cap) ? 0 : -EPERM; ++ struct aa_audit sa; ++ ++ /* test if cap has alread been logged */ ++ if (cap_raised(cxt->caps_logged, cap)) { ++ if (PROFILE_COMPLAIN(cxt->profile)) ++ error = 0; ++ return error; ++ } else ++ /* don't worry about rcu replacement of the cxt here. ++ * caps_logged is a cache to reduce the occurence of ++ * duplicate messages in the log. The worst that can ++ * happen is duplicate capability messages shows up in ++ * the audit log ++ */ ++ cap_raise(cxt->caps_logged, cap); ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "capable"; ++ sa.gfp_mask = GFP_ATOMIC; ++ sa.name = capability_names[cap]; ++ sa.error_code = error; ++ ++ error = aa_audit_caps(cxt->profile, &sa, cap); ++ ++ return error; ++} ++ ++/* must be used inside rcu_read_lock or task_lock */ ++int aa_may_ptrace(struct aa_task_context *cxt, struct aa_profile *tracee) ++{ ++ if (!cxt || cxt->profile == tracee) ++ return 0; ++ return aa_capability(cxt, CAP_SYS_PTRACE); ++} ++ ++/** ++ * aa_link - hard link check ++ * @profile: profile to check against ++ * @link: dentry of link being created ++ * @link_mnt: vfsmount of link being created ++ * @target: dentry of link target ++ * @target_mnt: vfsmunt of link target ++ * ++ * Returns 0 on success, or else an error code. ++ */ ++int aa_link(struct aa_profile *profile, ++ struct dentry *link, struct vfsmount *link_mnt, ++ struct dentry *target, struct vfsmount *target_mnt) ++{ ++ int error; ++ struct aa_audit sa; ++ char *buffer = NULL, *buffer2 = NULL; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "inode_link"; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.name = aa_get_name(link, link_mnt, &buffer, 0); ++ sa.name2 = aa_get_name(target, target_mnt, &buffer2, 0); ++ ++ if (IS_ERR(sa.name)) { ++ sa.error_code = PTR_ERR(sa.name); ++ sa.name = NULL; ++ } ++ if (IS_ERR(sa.name2)) { ++ sa.error_code = PTR_ERR(sa.name2); ++ sa.name2 = NULL; ++ } ++ ++ if (sa.name && sa.name2) { ++ sa.denied_mask = aa_link_denied(profile, sa.name, sa.name2, ++ aa_inode_mode(target->d_inode), ++ &sa.request_mask, ++ &sa.audit_mask); ++ sa.error_code = sa.denied_mask ? -EACCES : 0; ++ } ++ ++ error = aa_audit_file(profile, &sa); ++ ++ aa_put_name_buffer(buffer); ++ aa_put_name_buffer(buffer2); ++ ++ return error; ++} ++ ++/******************************* ++ * Global task related functions ++ *******************************/ ++ ++/** ++ * aa_clone - initialize the task context for a new task ++ * @child: task that is being created ++ * ++ * Returns 0 on success, or else an error code. ++ */ ++int aa_clone(struct task_struct *child) ++{ ++ struct aa_task_context *cxt, *child_cxt; ++ struct aa_profile *profile; ++ ++ if (!aa_task_context(current)) ++ return 0; ++ child_cxt = aa_alloc_task_context(GFP_KERNEL); ++ if (!child_cxt) ++ return -ENOMEM; ++ ++repeat: ++ profile = aa_get_profile(current); ++ if (profile) { ++ lock_profile(profile); ++ cxt = aa_task_context(current); ++ if (unlikely(profile->isstale || !cxt || ++ cxt->profile != profile)) { ++ /** ++ * Race with profile replacement or removal, or with ++ * task context removal. ++ */ ++ unlock_profile(profile); ++ aa_put_profile(profile); ++ goto repeat; ++ } ++ ++ /* No need to grab the child's task lock here. */ ++ aa_change_task_context(child, child_cxt, profile, ++ cxt->cookie, cxt->previous_profile); ++ unlock_profile(profile); ++ ++ if (APPARMOR_COMPLAIN(child_cxt) && ++ profile == profile->ns->null_complain_profile) { ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "clone"; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.task = child->pid; ++ aa_audit_hint(profile, &sa); ++ } ++ aa_put_profile(profile); ++ } else ++ aa_free_task_context(child_cxt); ++ ++ return 0; ++} ++ ++static struct aa_profile * ++aa_register_find(struct aa_profile *profile, const char* ns_name, ++ const char *name, int mandatory, int complain, ++ struct aa_audit *sa) ++{ ++ struct aa_namespace *ns; ++ struct aa_profile *new_profile; ++ int ns_ref = 0; ++ ++ if (profile) ++ ns = profile->ns; ++ else ++ ns = default_namespace; ++ ++ if (ns_name) { ++ /* locate the profile namespace */ ++ ns = aa_find_namespace(ns_name); ++ if (!ns) { ++ if (mandatory) { ++ sa->info = "profile namespace not found"; ++ sa->denied_mask = sa->request_mask; ++ sa->error_code = -ENOENT; ++ return ERR_PTR(-ENOENT); ++ } else { ++ return NULL; ++ } ++ } ++ ns_ref++; ++ } ++ ++ /* Locate new profile */ ++ new_profile = aa_find_profile(ns, name); ++ ++ if (new_profile) { ++ AA_DEBUG("%s: setting profile %s\n", ++ __FUNCTION__, new_profile->name); ++ } else if (mandatory && profile) { ++ sa->info = "mandatory profile missing"; ++ sa->denied_mask = sa->request_mask; /* shifted MAY_EXEC */ ++ if (complain) { ++ aa_audit_hint(profile, sa); ++ new_profile = ++ aa_dup_profile(profile->ns->null_complain_profile); ++ } else { ++ sa->error_code = -EACCES; ++ if (ns_ref) ++ aa_put_namespace(ns); ++ return ERR_PTR(-EACCES); ++ } ++ } else { ++ /* Only way we can get into this code is if task ++ * is unconfined, pix, nix. ++ */ ++ AA_DEBUG("%s: No profile found for exec image '%s'\n", ++ __FUNCTION__, ++ name); ++ } ++ if (ns_ref) ++ aa_put_namespace(ns); ++ return new_profile; ++} ++ ++static struct aa_profile * ++aa_x_to_profile(struct aa_profile *profile, const char *filename, int xmode, ++ struct aa_audit *sa, char **child) ++{ ++ struct aa_profile *new_profile = NULL; ++ int ix = xmode & AA_EXEC_INHERIT; ++ int complain = PROFILE_COMPLAIN(profile); ++ int index; ++ ++ *child = NULL; ++ switch (xmode & AA_EXEC_MODIFIERS) { ++ case 0: ++ /* only valid with ix flag */ ++ ix = 1; ++ break; ++ case AA_EXEC_UNCONFINED: ++ /* only valid without ix flag */ ++ ix = 0; ++ break; ++ case AA_EXEC_PROFILE: ++ new_profile = aa_register_find(profile, NULL, filename, !ix, ++ complain, sa); ++ break; ++ case AA_EXEC_CHILD: ++ *child = new_compound_name(profile->name, filename); ++ sa->name2 = *child; ++ if (!*child) { ++ sa->info = "Failed name resolution - exec failed"; ++ sa->error_code = -ENOMEM; ++ new_profile = ERR_PTR(-ENOMEM); ++ } else { ++ new_profile = aa_register_find(profile, NULL, *child, ++ !ix, complain, sa); ++ } ++ break; ++ default: ++ /* all other indexes are named transitions */ ++ index = AA_EXEC_INDEX(xmode); ++ if (index - 4 > profile->exec_table_size) { ++ sa->info = "invalid named transition - exec failed"; ++ sa->error_code = -EACCES; ++ new_profile = ERR_PTR(-EACCES); ++ } else { ++ char *ns_name = NULL; ++ char *name = profile->exec_table[index - 4]; ++ if (*name == ':') { ++ ns_name = name + 1; ++ name = ns_name + strlen(ns_name) + 1; ++ } ++ sa->name2 = name; ++ sa->name3 = ns_name; ++ new_profile = ++ aa_register_find(profile, ns_name, name, ++ !ix, complain, sa); ++ } ++ } ++ if (IS_ERR(new_profile)) ++ /* all these failures must be audited - no quieting */ ++ return ERR_PTR(aa_audit_reject(profile, sa)); ++ return new_profile; ++} ++ ++/** ++ * aa_register - register a new program ++ * @bprm: binprm of program being registered ++ * ++ * Try to register a new program during execve(). This should give the ++ * new program a valid aa_task_context if confined. ++ */ ++int aa_register(struct linux_binprm *bprm) ++{ ++ const char *filename; ++ char *buffer = NULL, *child = NULL; ++ struct file *filp = bprm->file; ++ struct aa_profile *profile, *old_profile, *new_profile = NULL; ++ int exec_mode, complain = 0, shift; ++ struct aa_audit sa; ++ ++ AA_DEBUG("%s\n", __FUNCTION__); ++ ++ profile = aa_get_profile(current); ++ ++ shift = aa_inode_mode(filp->f_dentry->d_inode); ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "exec"; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.request_mask = MAY_EXEC << shift; ++ ++ filename = aa_get_name(filp->f_dentry, filp->f_vfsmnt, &buffer, 0); ++ if (IS_ERR(filename)) { ++ if (profile) { ++ sa.info = "Failed name resolution - exec failed"; ++ sa.error_code = PTR_ERR(filename); ++ aa_audit_file(profile, &sa); ++ return sa.error_code; ++ } else ++ return 0; ++ } ++ sa.name = filename; ++ ++ exec_mode = AA_EXEC_UNSAFE << shift; ++ ++repeat: ++ if (profile) { ++ complain = PROFILE_COMPLAIN(profile); ++ ++ /* Confined task, determine what mode inherit, unconfined or ++ * mandatory to load new profile ++ */ ++ exec_mode = aa_match(profile->file_rules, filename, ++ &sa.audit_mask); ++ ++ ++ if (exec_mode & sa.request_mask) { ++ int xm = exec_mode >> shift; ++ new_profile = aa_x_to_profile(profile, filename, ++ xm, &sa, &child); ++ ++ if (!new_profile && (xm & AA_EXEC_INHERIT)) ++ /* (p|c|n|)ix - don't change profile */ ++ goto cleanup; ++ /* error case caught below */ ++ ++ } else if (sa.request_mask & AUDIT_QUIET_MASK(sa.audit_mask)) { ++ /* quiet failed exit */ ++ new_profile = ERR_PTR(-EACCES); ++ } else if (complain) { ++ /* There was no entry in calling profile ++ * describing mode to execute image in. ++ * Drop into null-profile (disabling secure exec). ++ */ ++ new_profile = ++ aa_dup_profile(profile->ns->null_complain_profile); ++ exec_mode |= AA_EXEC_UNSAFE << shift; ++ } else { ++ sa.denied_mask = sa.request_mask; ++ sa.error_code = -EACCES; ++ new_profile = ERR_PTR(aa_audit_file(profile, &sa)); ++ } ++ } else { ++ /* Unconfined task, load profile if it exists */ ++ new_profile = aa_register_find(NULL, NULL, filename, 0, 0, &sa); ++ if (new_profile == NULL) ++ goto cleanup; ++ } ++ ++ if (IS_ERR(new_profile)) ++ goto cleanup; ++ ++ old_profile = __aa_replace_profile(current, new_profile); ++ if (IS_ERR(old_profile)) { ++ aa_put_profile(new_profile); ++ aa_put_profile(profile); ++ if (PTR_ERR(old_profile) == -ESTALE) { ++ profile = aa_get_profile(current); ++ goto repeat; ++ } ++ if (PTR_ERR(old_profile) == -EPERM) { ++ sa.denied_mask = sa.request_mask; ++ sa.info = "unable to set profile due to ptrace"; ++ sa.task = current->parent->pid; ++ aa_audit_reject(profile, &sa); ++ } ++ new_profile = old_profile; ++ goto cleanup; ++ } ++ aa_put_profile(old_profile); ++ aa_put_profile(profile); ++ ++ /* Handle confined exec. ++ * Can be at this point for the following reasons: ++ * 1. unconfined switching to confined ++ * 2. confined switching to different confinement ++ * 3. confined switching to unconfined ++ * ++ * Cases 2 and 3 are marked as requiring secure exec ++ * (unless policy specified "unsafe exec") ++ */ ++ if (!(exec_mode & (AA_EXEC_UNSAFE << shift))) { ++ unsigned long bprm_flags; ++ ++ bprm_flags = AA_SECURE_EXEC_NEEDED; ++ bprm->security = (void*) ++ ((unsigned long)bprm->security | bprm_flags); ++ } ++ ++ if (complain && new_profile && ++ new_profile == new_profile->ns->null_complain_profile) { ++ sa.request_mask = 0; ++ sa.name = NULL; ++ sa.info = "set profile"; ++ aa_audit_hint(new_profile, &sa); ++ } ++ ++cleanup: ++ aa_put_name_buffer(child); ++ aa_put_name_buffer(buffer); ++ if (IS_ERR(new_profile)) ++ return PTR_ERR(new_profile); ++ aa_put_profile(new_profile); ++ return 0; ++} ++ ++/** ++ * aa_release - release a task context ++ * @task: task being released ++ * ++ * This is called after a task has exited and the parent has reaped it. ++ */ ++void aa_release(struct task_struct *task) ++{ ++ struct aa_task_context *cxt; ++ struct aa_profile *profile; ++ /* ++ * While the task context is still on a profile's task context ++ * list, another process could replace the profile under us, ++ * leaving us with a locked profile that is no longer attached ++ * to this task. So after locking the profile, we check that ++ * the profile is still attached. The profile lock is ++ * sufficient to prevent the replacement race so we do not lock ++ * the task. ++ * ++ * Use lock subtyping to avoid lockdep reporting a false irq ++ * possible inversion between the task_lock and profile_lock ++ * ++ * We also avoid taking the task_lock here because lock_dep ++ * would report another false {softirq-on-W} potential irq_lock ++ * inversion. ++ * ++ * If the task does not have a profile attached we are safe; ++ * nothing can race with us at this point. ++ */ ++ ++repeat: ++ profile = aa_get_profile(task); ++ if (profile) { ++ lock_profile_nested(profile, aa_lock_task_release); ++ cxt = aa_task_context(task); ++ if (unlikely(!cxt || cxt->profile != profile)) { ++ unlock_profile(profile); ++ aa_put_profile(profile); ++ goto repeat; ++ } ++ aa_change_task_context(task, NULL, NULL, 0, NULL); ++ unlock_profile(profile); ++ aa_put_profile(profile); ++ } ++} ++ ++static int do_change_profile(struct aa_profile *expected, ++ struct aa_namespace *ns, const char *name, ++ u64 cookie, int restore, int hat, ++ struct aa_audit *sa) ++{ ++ struct aa_profile *new_profile = NULL, *old_profile = NULL, ++ *previous_profile = NULL; ++ struct aa_task_context *new_cxt, *cxt; ++ int error = 0; ++ ++ sa->name = name; ++ ++ new_cxt = aa_alloc_task_context(GFP_KERNEL); ++ if (!new_cxt) ++ return -ENOMEM; ++ ++ new_profile = aa_find_profile(ns, name); ++ if (!new_profile && !restore) { ++ if (!PROFILE_COMPLAIN(expected)) { ++ aa_free_task_context(new_cxt); ++ return -ENOENT; ++ } ++ new_profile = aa_dup_profile(ns->null_complain_profile); ++ } else if (new_profile && hat && !PROFILE_IS_HAT(new_profile)) { ++ aa_free_task_context(new_cxt); ++ aa_put_profile(new_profile); ++ return error; ++ } ++ ++ cxt = lock_task_and_profiles(current, new_profile); ++ if (!cxt) { ++ error = -EPERM; ++ goto out; ++ } ++ old_profile = cxt->profile; ++ ++ if (cxt->profile != expected || (new_profile && new_profile->isstale)) { ++ error = -ESTALE; ++ goto out; ++ } ++ ++ if (cxt->previous_profile) { ++ if (cxt->cookie != cookie) { ++ error = -EACCES; ++ sa->info = "killing process"; ++ aa_audit_reject(cxt->profile, sa); ++ /* terminate process */ ++ (void)send_sig_info(SIGKILL, NULL, current); ++ goto out; ++ } ++ ++ if (!restore) ++ previous_profile = cxt->previous_profile; ++ } else ++ previous_profile = cxt->profile; ++ ++ if ((current->ptrace & PT_PTRACED) && aa_may_ptrace(cxt, new_profile)) { ++ error = -EACCES; ++ goto out; ++ } ++ ++ if (new_profile == ns->null_complain_profile) ++ aa_audit_hint(cxt->profile, sa); ++ ++ if (APPARMOR_AUDIT(cxt)) ++ aa_audit_message(cxt->profile, sa, AUDIT_APPARMOR_AUDIT); ++ ++ if (!restore && cookie) ++ aa_change_task_context(current, new_cxt, new_profile, cookie, ++ previous_profile); ++ else ++ /* either return to previous_profile, or a permanent change */ ++ aa_change_task_context(current, new_cxt, new_profile, 0, NULL); ++ ++out: ++ if (aa_task_context(current) != new_cxt) ++ aa_free_task_context(new_cxt); ++ task_unlock(current); ++ unlock_both_profiles(old_profile, new_profile); ++ aa_put_profile(new_profile); ++ return error; ++} ++ ++/** ++ * aa_change_profile - perform a one-way profile transition ++ * @ns_name: name of the profile namespace to change to ++ * @name: name of profile to change to ++ * Change to new profile @name. Unlike with hats, there is no way ++ * to change back. ++ * ++ * Returns %0 on success, error otherwise. ++ */ ++int aa_change_profile(const char *ns_name, const char *name) ++{ ++ struct aa_task_context *cxt; ++ struct aa_profile *profile = NULL; ++ struct aa_namespace *ns = NULL; ++ struct aa_audit sa; ++ unsigned int state; ++ int error = -EINVAL; ++ ++ if (!name) ++ return -EINVAL; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.gfp_mask = GFP_ATOMIC; ++ sa.operation = "change_profile"; ++ ++repeat: ++ task_lock(current); ++ cxt = aa_task_context(current); ++ if (cxt) ++ profile = aa_dup_profile(cxt->profile); ++ task_unlock(current); ++ ++ if (ns_name) ++ ns = aa_find_namespace(ns_name); ++ else if (profile) ++ ns = aa_get_namespace(profile->ns); ++ else ++ ns = aa_get_namespace(default_namespace); ++ ++ if (!ns) { ++ aa_put_profile(profile); ++ return -ENOENT; ++ } ++ ++ if (!profile || PROFILE_COMPLAIN(profile) || ++ (ns == profile->ns && ++ (aa_match(profile->file_rules, name, NULL) & AA_CHANGE_PROFILE))) ++ error = do_change_profile(profile, ns, name, 0, 0, 0, &sa); ++ else { ++ /* check for a rule with a namespace prepended */ ++ aa_match_state(profile->file_rules, DFA_START, ns->name, ++ &state); ++ state = aa_dfa_null_transition(profile->file_rules, state); ++ if ((aa_match_state(profile->file_rules, state, name, NULL) & ++ AA_CHANGE_PROFILE)) ++ error = do_change_profile(profile, ns, name, 0, 0, 0, ++ &sa); ++ else ++ /* no permission to transition to profile @name */ ++ error = -EACCES; ++ } ++ ++ aa_put_namespace(ns); ++ aa_put_profile(profile); ++ if (error == -ESTALE) ++ goto repeat; ++ ++ return error; ++} ++ ++/** ++ * aa_change_hat - change hat to/from subprofile ++ * @hat_name: hat to change to ++ * @cookie: magic value to validate the hat change ++ * ++ * Change to new @hat_name, and store the @hat_magic in the current task ++ * context. If the new @hat_name is %NULL and the @cookie matches that ++ * stored in the current task context and is not 0, return to the top level ++ * profile. ++ * Returns %0 on success, error otherwise. ++ */ ++int aa_change_hat(const char *hat_name, u64 cookie) ++{ ++ struct aa_task_context *cxt; ++ struct aa_profile *profile, *previous_profile; ++ struct aa_audit sa; ++ int error = 0; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.gfp_mask = GFP_ATOMIC; ++ sa.operation = "change_hat"; ++ ++repeat: ++ task_lock(current); ++ cxt = aa_task_context(current); ++ if (!cxt) { ++ task_unlock(current); ++ return -EPERM; ++ } ++ profile = aa_dup_profile(cxt->profile); ++ previous_profile = aa_dup_profile(cxt->previous_profile); ++ task_unlock(current); ++ ++ if (hat_name) { ++ char *name, *profile_name; ++ ++ if (previous_profile) ++ profile_name = previous_profile->name; ++ else ++ profile_name = profile->name; ++ ++ name = new_compound_name(profile_name, hat_name); ++ if (!name) { ++ error = -ENOMEM; ++ goto out; ++ } ++ error = do_change_profile(profile, profile->ns, name, cookie, ++ 0, 1, &sa); ++ aa_put_name_buffer(name); ++ } else if (previous_profile) ++ error = do_change_profile(profile, profile->ns, ++ previous_profile->name, cookie, 1, 0, ++ &sa); ++ /* else ignore restores when there is no saved profile */ ++ ++out: ++ aa_put_profile(previous_profile); ++ aa_put_profile(profile); ++ if (error == -ESTALE) ++ goto repeat; ++ ++ return error; ++} ++ ++/** ++ * __aa_replace_profile - replace a task's profile ++ * @task: task to switch the profile of ++ * @profile: profile to switch to ++ * ++ * Returns a handle to the previous profile upon success, or else an ++ * error code. ++ */ ++struct aa_profile *__aa_replace_profile(struct task_struct *task, ++ struct aa_profile *profile) ++{ ++ struct aa_task_context *cxt, *new_cxt = NULL; ++ struct aa_profile *old_profile = NULL; ++ ++ if (profile) { ++ new_cxt = aa_alloc_task_context(GFP_KERNEL); ++ if (!new_cxt) ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ cxt = lock_task_and_profiles(task, profile); ++ if (unlikely(profile && profile->isstale)) { ++ task_unlock(task); ++ unlock_both_profiles(profile, cxt ? cxt->profile : NULL); ++ aa_free_task_context(new_cxt); ++ return ERR_PTR(-ESTALE); ++ } ++ ++ if ((current->ptrace & PT_PTRACED) && aa_may_ptrace(cxt, profile)) { ++ task_unlock(task); ++ unlock_both_profiles(profile, cxt ? cxt->profile : NULL); ++ aa_free_task_context(new_cxt); ++ return ERR_PTR(-EPERM); ++ } ++ ++ if (cxt) ++ old_profile = aa_dup_profile(cxt->profile); ++ aa_change_task_context(task, new_cxt, profile, 0, NULL); ++ ++ task_unlock(task); ++ unlock_both_profiles(profile, old_profile); ++ return old_profile; ++} ++ ++/** ++ * lock_task_and_profiles - lock the task and confining profiles and @profile ++ * @task: task to lock ++ * @profile: extra profile to lock in addition to the current profile ++ * ++ * Handle the spinning on locking to make sure the task context and ++ * profile are consistent once all locks are aquired. ++ * ++ * return the aa_task_context currently confining the task. The task lock ++ * will be held whether or not the task is confined. ++ */ ++struct aa_task_context * ++lock_task_and_profiles(struct task_struct *task, struct aa_profile *profile) ++{ ++ struct aa_task_context *cxt; ++ struct aa_profile *old_profile = NULL; ++ ++ rcu_read_lock(); ++repeat: ++ cxt = aa_task_context(task); ++ if (cxt) ++ old_profile = cxt->profile; ++ ++ lock_both_profiles(profile, old_profile); ++ task_lock(task); ++ ++ /* check for race with profile transition, replacement or removal */ ++ if (unlikely(cxt != aa_task_context(task))) { ++ task_unlock(task); ++ unlock_both_profiles(profile, old_profile); ++ old_profile = NULL; ++ goto repeat; ++ } ++ rcu_read_unlock(); ++ return cxt; ++} ++ ++static void free_aa_task_context_rcu_callback(struct rcu_head *head) ++{ ++ struct aa_task_context *cxt; ++ ++ cxt = container_of(head, struct aa_task_context, rcu); ++ aa_free_task_context(cxt); ++} ++ ++/** ++ * aa_change_task_context - switch a task to use a new context and profile ++ * @task: task that is having its task context changed ++ * @new_cxt: new task context to use after the switch ++ * @profile: new profile to use after the switch ++ * @cookie: magic value to switch to ++ * @previous_profile: profile the task can return to ++ */ ++void aa_change_task_context(struct task_struct *task, ++ struct aa_task_context *new_cxt, ++ struct aa_profile *profile, u64 cookie, ++ struct aa_profile *previous_profile) ++{ ++ struct aa_task_context *old_cxt = aa_task_context(task); ++ ++ if (old_cxt) { ++ list_del_init(&old_cxt->list); ++ call_rcu(&old_cxt->rcu, free_aa_task_context_rcu_callback); ++ } ++ if (new_cxt) { ++ /* set the caps_logged cache to the quiet_caps mask ++ * this has the effect of quieting caps that are not ++ * supposed to be logged ++ */ ++ new_cxt->caps_logged = profile->quiet_caps; ++ new_cxt->cookie = cookie; ++ new_cxt->task = task; ++ new_cxt->profile = aa_dup_profile(profile); ++ new_cxt->previous_profile = aa_dup_profile(previous_profile); ++ list_move(&new_cxt->list, &profile->task_contexts); ++ } ++ rcu_assign_pointer(task->security, new_cxt); ++} diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-misc.diff b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-misc.diff new file mode 100644 index 0000000000..abffd1c483 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-misc.diff @@ -0,0 +1,1441 @@ +From: John Johansen +Subject: AppArmor: all the rest + +All the things that didn't nicely fit in a category on their own: kbuild +code, declararions and inline functions, /sys/kernel/security/apparmor +filesystem for controlling apparmor from user space, profile list +functions, locking documentation, /proc/$pid/task/$tid/attr/current +access. + +Signed-off-by: John Johansen +Signed-off-by: Andreas Gruenbacher + +--- + security/apparmor/Kconfig | 42 ++++ + security/apparmor/Makefile | 13 + + security/apparmor/apparmor.h | 371 +++++++++++++++++++++++++++++++++++++++++ + security/apparmor/apparmorfs.c | 281 +++++++++++++++++++++++++++++++ + security/apparmor/inline.h | 250 +++++++++++++++++++++++++++ + security/apparmor/list.c | 174 +++++++++++++++++++ + security/apparmor/locking.txt | 68 +++++++ + security/apparmor/procattr.c | 195 +++++++++++++++++++++ + 8 files changed, 1394 insertions(+) + +--- /dev/null ++++ b/security/apparmor/Kconfig +@@ -0,0 +1,42 @@ ++config SECURITY_APPARMOR ++ bool "AppArmor support" ++ depends on SECURITY ++ select AUDIT ++ help ++ This enables the AppArmor security module. ++ Required userspace tools (if they are not included in your ++ distribution) and further information may be found at ++ ++ ++ If you are unsure how to answer this question, answer N. ++ ++config SECURITY_APPARMOR_BOOTPARAM_VALUE ++ int "AppArmor boot parameter default value" ++ depends on SECURITY_APPARMOR ++ range 0 1 ++ default 1 ++ help ++ This option sets the default value for the kernel parameter ++ 'apparmor', which allows AppArmor to be enabled or disabled ++ at boot. If this option is set to 0 (zero), the AppArmor ++ kernel parameter will default to 0, disabling AppArmor at ++ bootup. If this option is set to 1 (one), the AppArmor ++ kernel parameter will default to 1, enabling AppArmor at ++ bootup. ++ ++ If you are unsure how to answer this question, answer 1. ++ ++config SECURITY_APPARMOR_DISABLE ++ bool "AppArmor runtime disable" ++ depends on SECURITY_APPARMOR ++ default n ++ help ++ This option enables writing to a apparmorfs node 'disable', which ++ allows AppArmor to be disabled at runtime prior to the policy load. ++ AppArmor will then remain disabled until the next boot. ++ This option is similar to the apparmor.enabled=0 boot parameter, ++ but is to support runtime disabling of AppArmor, e.g. from ++ /sbin/init, for portability across platforms where boot ++ parameters are difficult to employ. ++ ++ If you are unsure how to answer this question, answer N. +--- /dev/null ++++ b/security/apparmor/Makefile +@@ -0,0 +1,13 @@ ++# Makefile for AppArmor Linux Security Module ++# ++obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o ++ ++apparmor-y := main.o list.o procattr.o lsm.o apparmorfs.o \ ++ module_interface.o match.o ++ ++quiet_cmd_make-caps = GEN $@ ++cmd_make-caps = sed -n -e "/CAP_FS_MASK/d" -e "s/^\#define[ \\t]\\+CAP_\\([A-Z0-9_]\\+\\)[ \\t]\\+\\([0-9]\\+\\)\$$/[\\2] = \"\\1\",/p" $< | tr A-Z a-z > $@ ++ ++$(obj)/main.o : $(obj)/capability_names.h ++$(obj)/capability_names.h : $(srctree)/include/linux/capability.h ++ $(call cmd,make-caps) +--- /dev/null ++++ b/security/apparmor/apparmor.h +@@ -0,0 +1,371 @@ ++/* ++ * Copyright (C) 1998-2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ * AppArmor internal prototypes ++ */ ++ ++#ifndef __APPARMOR_H ++#define __APPARMOR_H ++ ++#include ++#include ++#include ++#include ++ ++/* ++ * We use MAY_READ, MAY_WRITE, MAY_EXEC, MAY_APPEND and the following flags ++ * for profile permissions ++ */ ++#define AA_MAY_LINK 0x0010 ++#define AA_MAY_LOCK 0x0020 ++#define AA_EXEC_MMAP 0x0040 ++#define AA_MAY_MOUNT 0x0080 /* no direct audit mapping */ ++#define AA_EXEC_UNSAFE 0x0100 ++#define AA_EXEC_INHERIT 0x0200 ++#define AA_EXEC_MOD_0 0x0400 ++#define AA_EXEC_MOD_1 0x0800 ++#define AA_EXEC_MOD_2 0x1000 ++#define AA_EXEC_MOD_3 0x2000 ++ ++#define AA_BASE_PERMS (MAY_READ | MAY_WRITE | MAY_EXEC | \ ++ MAY_APPEND | AA_MAY_LINK | \ ++ AA_MAY_LOCK | AA_EXEC_MMAP | \ ++ AA_MAY_MOUNT | AA_EXEC_UNSAFE | \ ++ AA_EXEC_INHERIT | AA_EXEC_MOD_0 | \ ++ AA_EXEC_MOD_1 | AA_EXEC_MOD_2 | \ ++ AA_EXEC_MOD_3) ++ ++#define AA_EXEC_MODIFIERS (AA_EXEC_MOD_0 | AA_EXEC_MOD_1 | \ ++ AA_EXEC_MOD_2 | AA_EXEC_MOD_3) ++ ++#define AA_EXEC_TYPE (AA_EXEC_UNSAFE | AA_EXEC_INHERIT | \ ++ AA_EXEC_MODIFIERS) ++ ++#define AA_EXEC_UNCONFINED AA_EXEC_MOD_0 ++#define AA_EXEC_PROFILE AA_EXEC_MOD_1 ++#define AA_EXEC_CHILD (AA_EXEC_MOD_0 | AA_EXEC_MOD_1) ++/* remaining exec modes are index into profile name table */ ++#define AA_EXEC_INDEX(mode) ((mode & AA_EXEC_MODIFIERS) >> 10) ++ ++#define AA_USER_SHIFT 0 ++#define AA_OTHER_SHIFT 14 ++ ++#define AA_USER_PERMS (AA_BASE_PERMS << AA_USER_SHIFT) ++#define AA_OTHER_PERMS (AA_BASE_PERMS << AA_OTHER_SHIFT) ++ ++#define AA_FILE_PERMS (AA_USER_PERMS | AA_OTHER_PERMS) ++ ++#define AA_LINK_BITS ((AA_MAY_LINK << AA_USER_SHIFT) | \ ++ (AA_MAY_LINK << AA_OTHER_SHIFT)) ++ ++#define AA_USER_EXEC (MAY_EXEC << AA_USER_SHIFT) ++#define AA_OTHER_EXEC (MAY_EXEC << AA_OTHER_SHIFT) ++ ++#define AA_USER_EXEC_TYPE (AA_EXEC_TYPE << AA_USER_SHIFT) ++#define AA_OTHER_EXEC_TYPE (AA_EXEC_TYPE << AA_OTHER_SHIFT) ++ ++#define AA_EXEC_BITS (AA_USER_EXEC | AA_OTHER_EXEC) ++ ++#define ALL_AA_EXEC_UNSAFE ((AA_EXEC_UNSAFE << AA_USER_SHIFT) | \ ++ (AA_EXEC_UNSAFE << AA_OTHER_SHIFT)) ++ ++#define ALL_AA_EXEC_TYPE (AA_USER_EXEC_TYPE | AA_OTHER_EXEC_TYPE) ++ ++/* overloaded permissions for link pairs */ ++#define AA_LINK_SUBSET_TEST 0x0020 ++ ++#define AA_USER_PTRACE 0x10000000 ++#define AA_OTHER_PTRACE 0x20000000 ++#define AA_PTRACE_PERMS (AA_USER_PTRACE | AA_OTHER_PTRACE) ++ ++/* shared permissions that are not duplicated in user::other */ ++#define AA_CHANGE_HAT 0x40000000 ++#define AA_CHANGE_PROFILE 0x80000000 ++ ++#define AA_SHARED_PERMS (AA_CHANGE_HAT | AA_CHANGE_PROFILE) ++ ++#define AA_VALID_PERM_MASK (AA_FILE_PERMS | AA_PTRACE_PERMS | \ ++ AA_SHARED_PERMS) ++ ++/* audit bits for the second accept field */ ++#define AUDIT_FILE_MASK 0x1fc07f ++#define AUDIT_QUIET_MASK(mask) ((mask >> 7) & AUDIT_FILE_MASK) ++#define AA_VALID_PERM2_MASK 0x0fffffff ++ ++#define AA_SECURE_EXEC_NEEDED 1 ++ ++/* Control parameters (0 or 1), settable thru module/boot flags or ++ * via /sys/kernel/security/apparmor/control */ ++extern int apparmor_complain; ++extern int apparmor_debug; ++extern int apparmor_audit; ++extern int apparmor_logsyscall; ++extern unsigned int apparmor_path_max; ++ ++#define PROFILE_COMPLAIN(_profile) \ ++ (apparmor_complain == 1 || ((_profile) && (_profile)->flags.complain)) ++ ++#define APPARMOR_COMPLAIN(_cxt) \ ++ (apparmor_complain == 1 || \ ++ ((_cxt) && (_cxt)->profile && (_cxt)->profile->flags.complain)) ++ ++#define PROFILE_AUDIT(_profile) \ ++ (apparmor_audit == 1 || ((_profile) && (_profile)->flags.audit)) ++ ++#define APPARMOR_AUDIT(_cxt) \ ++ (apparmor_audit == 1 || \ ++ ((_cxt) && (_cxt)->profile && (_cxt)->profile->flags.audit)) ++ ++#define PROFILE_IS_HAT(_profile) \ ++ ((_profile) && (_profile)->flags.hat) ++ ++/* ++ * DEBUG remains global (no per profile flag) since it is mostly used in sysctl ++ * which is not related to profile accesses. ++ */ ++ ++#define AA_DEBUG(fmt, args...) \ ++ do { \ ++ if (apparmor_debug) \ ++ printk(KERN_DEBUG "AppArmor: " fmt, ##args); \ ++ } while (0) ++ ++#define AA_ERROR(fmt, args...) printk(KERN_ERR "AppArmor: " fmt, ##args) ++ ++struct aa_profile; ++ ++/* struct aa_namespace - namespace for a set of profiles ++ * @name: the name of the namespace ++ * @list: list the namespace is on ++ * @profiles: list of profile in the namespace ++ * @profile_count: the number of profiles in the namespace ++ * @null_complain_profile: special profile used for learning in this namespace ++ * @count: reference count on the namespace ++ * @lock: lock for adding/removing profile to the namespace ++ */ ++struct aa_namespace { ++ char *name; ++ struct list_head list; ++ struct list_head profiles; ++ int profile_count; ++ struct aa_profile *null_complain_profile; ++ ++ struct kref count; ++ rwlock_t lock; ++}; ++ ++/* struct aa_profile - basic confinement data ++ * @name: the profiles name ++ * @list: list this profile is on ++ * @ns: namespace the profile is in ++ * @file_rules: dfa containing the profiles file rules ++ * @flags: flags controlling profile behavior ++ * @isstale: flag indicating if profile is stale ++ * @set_caps: capabilities that are being set ++ * @capabilities: capabilities mask ++ * @audit_caps: caps that are to be audited ++ * @quiet_caps: caps that should not be audited ++ * @capabilities: capabilities granted by the process ++ * @count: reference count of the profile ++ * @task_contexts: list of tasks confined by profile ++ * @lock: lock for the task_contexts list ++ * @network_families: basic network permissions ++ * @audit_network: which network permissions to force audit ++ * @quiet_network: which network permissions to quiet rejects ++ * ++ * The AppArmor profile contains the basic confinement data. Each profile ++ * has a name, and all nonstale profile are in a profile namespace. ++ * ++ * The task_contexts list and the isstale flag are protected by the ++ * profile lock. ++ * ++ * If a task context is moved between two profiles, we first need to grab ++ * both profile locks. lock_both_profiles() does that in a deadlock-safe ++ * way. ++ */ ++struct aa_profile { ++ char *name; ++ struct list_head list; ++ struct aa_namespace *ns; ++ ++ int exec_table_size; ++ char **exec_table; ++ struct aa_dfa *file_rules; ++ struct { ++ int hat; ++ int complain; ++ int audit; ++ } flags; ++ int isstale; ++ ++ kernel_cap_t set_caps; ++ kernel_cap_t capabilities; ++ kernel_cap_t audit_caps; ++ kernel_cap_t quiet_caps; ++ ++ struct kref count; ++ struct list_head task_contexts; ++ spinlock_t lock; ++ unsigned long int_flags; ++}; ++ ++extern struct list_head profile_ns_list; ++extern rwlock_t profile_ns_list_lock; ++extern struct mutex aa_interface_lock; ++ ++/** ++ * struct aa_task_context - primary label for confined tasks ++ * @profile: the current profile ++ * @previous_profile: profile the task may return to ++ * @cookie: magic value the task must know for returning to @previous_profile ++ * @list: list this aa_task_context is on ++ * @task: task that the aa_task_context confines ++ * @rcu: rcu head used when freeing the aa_task_context ++ * @caps_logged: caps that have previously generated log entries ++ * ++ * Contains the task's current profile (which could change due to ++ * change_hat). Plus the hat_magic needed during change_hat. ++ */ ++struct aa_task_context { ++ struct aa_profile *profile; ++ struct aa_profile *previous_profile; ++ u64 cookie; ++ struct list_head list; ++ struct task_struct *task; ++ struct rcu_head rcu; ++ kernel_cap_t caps_logged; ++}; ++ ++extern struct aa_namespace *default_namespace; ++ ++/* aa_audit - AppArmor auditing structure ++ * Structure is populated by access control code and passed to aa_audit which ++ * provides for a single point of logging. ++ */ ++ ++struct aa_audit { ++ const char *operation; ++ gfp_t gfp_mask; ++ const char *info; ++ const char *name; ++ const char *name2; ++ const char *name3; ++ int request_mask, denied_mask, audit_mask; ++ struct iattr *iattr; ++ pid_t task, parent; ++ int error_code; ++}; ++ ++/* Flags for the permission check functions */ ++#define AA_CHECK_FD 1 /* coming from a file descriptor */ ++#define AA_CHECK_DIR 2 /* file type is directory */ ++ ++/* lock subtypes so lockdep does not raise false dependencies */ ++enum aa_lock_class { ++ aa_lock_normal, ++ aa_lock_nested, ++ aa_lock_task_release ++}; ++ ++/* main.c */ ++extern int alloc_default_namespace(void); ++extern void free_default_namespace(void); ++extern int aa_audit_message(struct aa_profile *profile, struct aa_audit *sa, ++ int type); ++void aa_audit_hint(struct aa_profile *profile, struct aa_audit *sa); ++void aa_audit_status(struct aa_profile *profile, struct aa_audit *sa); ++int aa_audit_reject(struct aa_profile *profile, struct aa_audit *sa); ++extern int aa_audit_syscallreject(struct aa_profile *profile, gfp_t gfp, ++ const char *); ++extern int aa_audit(struct aa_profile *profile, struct aa_audit *); ++ ++extern int aa_attr(struct aa_profile *profile, struct dentry *dentry, ++ struct vfsmount *mnt, struct iattr *iattr); ++extern int aa_perm_xattr(struct aa_profile *profile, const char *operation, ++ struct dentry *dentry, struct vfsmount *mnt, ++ int mask, int check); ++extern int aa_capability(struct aa_task_context *cxt, int cap); ++extern int aa_perm(struct aa_profile *profile, const char *operation, ++ struct dentry *dentry, struct vfsmount *mnt, int mask, ++ int check); ++extern int aa_perm_dir(struct aa_profile *profile, const char *operation, ++ struct dentry *dentry, struct vfsmount *mnt, ++ int mask); ++extern int aa_perm_path(struct aa_profile *, const char *operation, ++ const char *name, int mask, uid_t uid); ++extern int aa_link(struct aa_profile *profile, ++ struct dentry *link, struct vfsmount *link_mnt, ++ struct dentry *target, struct vfsmount *target_mnt); ++extern int aa_clone(struct task_struct *task); ++extern int aa_register(struct linux_binprm *bprm); ++extern void aa_release(struct task_struct *task); ++extern int aa_change_hat(const char *id, u64 hat_magic); ++extern int aa_change_profile(const char *ns_name, const char *name); ++extern struct aa_profile *__aa_replace_profile(struct task_struct *task, ++ struct aa_profile *profile); ++extern struct aa_task_context *lock_task_and_profiles(struct task_struct *task, ++ struct aa_profile *profile); ++extern void unlock_task_and_profiles(struct task_struct *task, ++ struct aa_task_context *cxt, ++ struct aa_profile *profile); ++extern void aa_change_task_context(struct task_struct *task, ++ struct aa_task_context *new_cxt, ++ struct aa_profile *profile, u64 cookie, ++ struct aa_profile *previous_profile); ++extern int aa_may_ptrace(struct aa_task_context *cxt, ++ struct aa_profile *tracee); ++ ++/* lsm.c */ ++extern int apparmor_initialized; ++extern void info_message(const char *str); ++extern void apparmor_disable(void); ++ ++/* list.c */ ++extern struct aa_namespace *__aa_find_namespace(const char *name, ++ struct list_head *list); ++extern struct aa_profile *__aa_find_profile(const char *name, ++ struct list_head *list); ++extern void aa_profile_ns_list_release(void); ++ ++/* module_interface.c */ ++extern ssize_t aa_add_profile(void *, size_t); ++extern ssize_t aa_replace_profile(void *, size_t); ++extern ssize_t aa_remove_profile(char *, size_t); ++extern struct aa_namespace *alloc_aa_namespace(char *name); ++extern void free_aa_namespace(struct aa_namespace *ns); ++extern void free_aa_namespace_kref(struct kref *kref); ++extern struct aa_profile *alloc_aa_profile(void); ++extern void free_aa_profile(struct aa_profile *profile); ++extern void free_aa_profile_kref(struct kref *kref); ++extern void aa_unconfine_tasks(struct aa_profile *profile); ++ ++/* procattr.c */ ++extern int aa_getprocattr(struct aa_profile *profile, char **string, ++ unsigned *len); ++extern int aa_setprocattr_changehat(char *args); ++extern int aa_setprocattr_changeprofile(char *args); ++extern int aa_setprocattr_setprofile(struct task_struct *task, char *args); ++ ++/* apparmorfs.c */ ++extern int create_apparmorfs(void); ++extern void destroy_apparmorfs(void); ++ ++/* match.c */ ++extern struct aa_dfa *aa_match_alloc(void); ++extern void aa_match_free(struct aa_dfa *dfa); ++extern int unpack_dfa(struct aa_dfa *dfa, void *blob, size_t size); ++extern int verify_dfa(struct aa_dfa *dfa); ++extern unsigned int aa_dfa_match(struct aa_dfa *dfa, const char *str, int *); ++extern unsigned int aa_dfa_next_state(struct aa_dfa *dfa, unsigned int start, ++ const char *str); ++extern unsigned int aa_match_state(struct aa_dfa *dfa, unsigned int start, ++ const char *str, unsigned int *final); ++extern unsigned int aa_dfa_null_transition(struct aa_dfa *dfa, ++ unsigned int start); ++ ++#endif /* __APPARMOR_H */ +--- /dev/null ++++ b/security/apparmor/apparmorfs.c +@@ -0,0 +1,281 @@ ++/* ++ * Copyright (C) 1998-2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ * AppArmor filesystem (part of securityfs) ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "apparmor.h" ++#include "inline.h" ++ ++static char *aa_simple_write_to_buffer(const char __user *userbuf, ++ size_t alloc_size, size_t copy_size, ++ loff_t *pos, const char *operation) ++{ ++ struct aa_profile *profile; ++ char *data; ++ ++ if (*pos != 0) { ++ /* only writes from pos 0, that is complete writes */ ++ data = ERR_PTR(-ESPIPE); ++ goto out; ++ } ++ ++ /* ++ * Don't allow confined processes to load/replace/remove profiles. ++ * No sane person would add rules allowing this to a profile ++ * but we enforce the restriction anyways. ++ */ ++ profile = aa_get_profile(current); ++ if (profile) { ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = operation; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.error_code = -EACCES; ++ data = ERR_PTR(aa_audit_reject(profile, &sa)); ++ aa_put_profile(profile); ++ goto out; ++ } ++ ++ data = vmalloc(alloc_size); ++ if (data == NULL) { ++ data = ERR_PTR(-ENOMEM); ++ goto out; ++ } ++ ++ if (copy_from_user(data, userbuf, copy_size)) { ++ vfree(data); ++ data = ERR_PTR(-EFAULT); ++ goto out; ++ } ++ ++out: ++ return data; ++} ++ ++/* apparmor/profiles */ ++extern struct seq_operations apparmorfs_profiles_op; ++ ++static int aa_profiles_open(struct inode *inode, struct file *file) ++{ ++ return seq_open(file, &apparmorfs_profiles_op); ++} ++ ++ ++static int aa_profiles_release(struct inode *inode, struct file *file) ++{ ++ return seq_release(inode, file); ++} ++ ++static struct file_operations apparmorfs_profiles_fops = { ++ .open = aa_profiles_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = aa_profiles_release, ++}; ++ ++/* apparmor/matching */ ++static ssize_t aa_matching_read(struct file *file, char __user *buf, ++ size_t size, loff_t *ppos) ++{ ++ const char *matching = "pattern=aadfa audit perms=rwxamlk/ user::other"; ++ ++ return simple_read_from_buffer(buf, size, ppos, matching, ++ strlen(matching)); ++} ++ ++static struct file_operations apparmorfs_matching_fops = { ++ .read = aa_matching_read, ++}; ++ ++/* apparmor/features */ ++static ssize_t aa_features_read(struct file *file, char __user *buf, ++ size_t size, loff_t *ppos) ++{ ++ const char *features = "file=3.0 capability=2.0 network=1.0 " ++ "change_hat=1.5 change_profile=1.0 " ++ "aanamespaces=1.0"; ++ ++ return simple_read_from_buffer(buf, size, ppos, features, ++ strlen(features)); ++} ++ ++static struct file_operations apparmorfs_features_fops = { ++ .read = aa_features_read, ++}; ++ ++/* apparmor/.load */ ++static ssize_t aa_profile_load(struct file *f, const char __user *buf, ++ size_t size, loff_t *pos) ++{ ++ char *data; ++ ssize_t error; ++ ++ data = aa_simple_write_to_buffer(buf, size, size, pos, "profile_load"); ++ ++ error = PTR_ERR(data); ++ if (!IS_ERR(data)) { ++ error = aa_add_profile(data, size); ++ vfree(data); ++ } ++ ++ return error; ++} ++ ++ ++static struct file_operations apparmorfs_profile_load = { ++ .write = aa_profile_load ++}; ++ ++/* apparmor/.replace */ ++static ssize_t aa_profile_replace(struct file *f, const char __user *buf, ++ size_t size, loff_t *pos) ++{ ++ char *data; ++ ssize_t error; ++ ++ data = aa_simple_write_to_buffer(buf, size, size, pos, ++ "profile_replace"); ++ ++ error = PTR_ERR(data); ++ if (!IS_ERR(data)) { ++ error = aa_replace_profile(data, size); ++ vfree(data); ++ } ++ ++ return error; ++} ++ ++ ++static struct file_operations apparmorfs_profile_replace = { ++ .write = aa_profile_replace ++}; ++ ++/* apparmor/.remove */ ++static ssize_t aa_profile_remove(struct file *f, const char __user *buf, ++ size_t size, loff_t *pos) ++{ ++ char *data; ++ ssize_t error; ++ ++ /* ++ * aa_remove_profile needs a null terminated string so 1 extra ++ * byte is allocated and the copied data is null terminated. ++ */ ++ data = aa_simple_write_to_buffer(buf, size + 1, size, pos, ++ "profile_remove"); ++ ++ error = PTR_ERR(data); ++ if (!IS_ERR(data)) { ++ data[size] = 0; ++ error = aa_remove_profile(data, size); ++ vfree(data); ++ } ++ ++ return error; ++} ++ ++static struct file_operations apparmorfs_profile_remove = { ++ .write = aa_profile_remove ++}; ++ ++static struct dentry *apparmor_dentry; ++ ++static void aafs_remove(const char *name) ++{ ++ struct dentry *dentry; ++ ++ dentry = lookup_one_len(name, apparmor_dentry, strlen(name)); ++ if (!IS_ERR(dentry)) { ++ securityfs_remove(dentry); ++ dput(dentry); ++ } ++} ++ ++static int aafs_create(const char *name, int mask, struct file_operations *fops) ++{ ++ struct dentry *dentry; ++ ++ dentry = securityfs_create_file(name, S_IFREG | mask, apparmor_dentry, ++ NULL, fops); ++ ++ return IS_ERR(dentry) ? PTR_ERR(dentry) : 0; ++} ++ ++void destroy_apparmorfs(void) ++{ ++ if (apparmor_dentry) { ++ aafs_remove(".remove"); ++ aafs_remove(".replace"); ++ aafs_remove(".load"); ++ aafs_remove("matching"); ++ aafs_remove("features"); ++ aafs_remove("profiles"); ++ securityfs_remove(apparmor_dentry); ++ apparmor_dentry = NULL; ++ } ++} ++ ++int create_apparmorfs(void) ++{ ++ int error; ++ ++ if (!apparmor_initialized) ++ return 0; ++ ++ if (apparmor_dentry) { ++ AA_ERROR("%s: AppArmor securityfs already exists\n", ++ __FUNCTION__); ++ return -EEXIST; ++ } ++ ++ apparmor_dentry = securityfs_create_dir("apparmor", NULL); ++ if (IS_ERR(apparmor_dentry)) { ++ error = PTR_ERR(apparmor_dentry); ++ apparmor_dentry = NULL; ++ goto error; ++ } ++ error = aafs_create("profiles", 0440, &apparmorfs_profiles_fops); ++ if (error) ++ goto error; ++ error = aafs_create("matching", 0444, &apparmorfs_matching_fops); ++ if (error) ++ goto error; ++ error = aafs_create("features", 0444, &apparmorfs_features_fops); ++ if (error) ++ goto error; ++ error = aafs_create(".load", 0640, &apparmorfs_profile_load); ++ if (error) ++ goto error; ++ error = aafs_create(".replace", 0640, &apparmorfs_profile_replace); ++ if (error) ++ goto error; ++ error = aafs_create(".remove", 0640, &apparmorfs_profile_remove); ++ if (error) ++ goto error; ++ ++ /* Report that AppArmor fs is enabled */ ++ info_message("AppArmor Filesystem Enabled"); ++ return 0; ++ ++error: ++ destroy_apparmorfs(); ++ AA_ERROR("Error creating AppArmor securityfs\n"); ++ apparmor_disable(); ++ return error; ++} ++ ++fs_initcall(create_apparmorfs); ++ +--- /dev/null ++++ b/security/apparmor/inline.h +@@ -0,0 +1,250 @@ ++/* ++ * Copyright (C) 1998-2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ */ ++ ++#ifndef __INLINE_H ++#define __INLINE_H ++ ++#include ++ ++#include "match.h" ++ ++static inline int mediated_filesystem(struct inode *inode) ++{ ++ return !(inode->i_sb->s_flags & MS_NOUSER); ++} ++ ++static inline struct aa_task_context *aa_task_context(struct task_struct *task) ++{ ++ return (struct aa_task_context *) rcu_dereference(task->security); ++} ++ ++static inline struct aa_namespace *aa_get_namespace(struct aa_namespace *ns) ++{ ++ if (ns) ++ kref_get(&(ns->count)); ++ ++ return ns; ++} ++ ++static inline void aa_put_namespace(struct aa_namespace *ns) ++{ ++ if (ns) ++ kref_put(&ns->count, free_aa_namespace_kref); ++} ++ ++ ++static inline struct aa_namespace *aa_find_namespace(const char *name) ++{ ++ struct aa_namespace *ns = NULL; ++ ++ read_lock(&profile_ns_list_lock); ++ ns = aa_get_namespace(__aa_find_namespace(name, &profile_ns_list)); ++ read_unlock(&profile_ns_list_lock); ++ ++ return ns; ++} ++ ++/** ++ * aa_dup_profile - increment refcount on profile @p ++ * @p: profile ++ */ ++static inline struct aa_profile *aa_dup_profile(struct aa_profile *p) ++{ ++ if (p) ++ kref_get(&(p->count)); ++ ++ return p; ++} ++ ++/** ++ * aa_put_profile - decrement refcount on profile @p ++ * @p: profile ++ */ ++static inline void aa_put_profile(struct aa_profile *p) ++{ ++ if (p) ++ kref_put(&p->count, free_aa_profile_kref); ++} ++ ++static inline struct aa_profile *aa_get_profile(struct task_struct *task) ++{ ++ struct aa_task_context *cxt; ++ struct aa_profile *profile = NULL; ++ ++ rcu_read_lock(); ++ cxt = aa_task_context(task); ++ if (cxt) { ++ profile = cxt->profile; ++ aa_dup_profile(profile); ++ } ++ rcu_read_unlock(); ++ ++ return profile; ++} ++ ++static inline struct aa_profile *aa_find_profile(struct aa_namespace *ns, ++ const char *name) ++{ ++ struct aa_profile *profile = NULL; ++ ++ read_lock(&ns->lock); ++ profile = aa_dup_profile(__aa_find_profile(name, &ns->profiles)); ++ read_unlock(&ns->lock); ++ ++ return profile; ++} ++ ++static inline struct aa_task_context *aa_alloc_task_context(gfp_t flags) ++{ ++ struct aa_task_context *cxt; ++ ++ cxt = kzalloc(sizeof(*cxt), flags); ++ if (cxt) { ++ INIT_LIST_HEAD(&cxt->list); ++ INIT_RCU_HEAD(&cxt->rcu); ++ } ++ ++ return cxt; ++} ++ ++static inline void aa_free_task_context(struct aa_task_context *cxt) ++{ ++ if (cxt) { ++ aa_put_profile(cxt->profile); ++ aa_put_profile(cxt->previous_profile); ++ kfree(cxt); ++ } ++} ++ ++/** ++ * lock_profile - lock a profile ++ * @profile: the profile to lock ++ * ++ * While the profile is locked, local interrupts are disabled. This also ++ * gives us RCU reader safety. ++ */ ++static inline void lock_profile_nested(struct aa_profile *profile, ++ enum aa_lock_class lock_class) ++{ ++ /* ++ * Lock the profile. ++ * ++ * Need to disable interrupts here because this lock is used in ++ * the task_free_security hook, which may run in RCU context. ++ */ ++ if (profile) ++ spin_lock_irqsave_nested(&profile->lock, profile->int_flags, ++ lock_class); ++} ++ ++static inline void lock_profile(struct aa_profile *profile) ++{ ++ lock_profile_nested(profile, aa_lock_normal); ++} ++ ++/** ++ * unlock_profile - unlock a profile ++ * @profile: the profile to unlock ++ */ ++static inline void unlock_profile(struct aa_profile *profile) ++{ ++ /* Unlock the profile. */ ++ if (profile) ++ spin_unlock_irqrestore(&profile->lock, profile->int_flags); ++} ++ ++/** ++ * lock_both_profiles - lock two profiles in a deadlock-free way ++ * @profile1: profile to lock (may be NULL) ++ * @profile2: profile to lock (may be NULL) ++ * ++ * The order in which profiles are passed into lock_both_profiles() / ++ * unlock_both_profiles() does not matter. ++ * While the profile is locked, local interrupts are disabled. This also ++ * gives us RCU reader safety. ++ */ ++static inline void lock_both_profiles(struct aa_profile *profile1, ++ struct aa_profile *profile2) ++{ ++ /* ++ * Lock the two profiles. ++ * ++ * We need to disable interrupts because the profile locks are ++ * used in the task_free_security hook, which may run in RCU ++ * context. ++ * ++ * Do not nest spin_lock_irqsave()/spin_unlock_irqresore(): ++ * interrupts only need to be turned off once. ++ */ ++ if (!profile1 || profile1 == profile2) { ++ if (profile2) ++ spin_lock_irqsave_nested(&profile2->lock, ++ profile2->int_flags, ++ aa_lock_normal); ++ } else if (profile1 > profile2) { ++ /* profile1 cannot be NULL here. */ ++ spin_lock_irqsave_nested(&profile1->lock, profile1->int_flags, ++ aa_lock_normal); ++ if (profile2) ++ spin_lock_nested(&profile2->lock, aa_lock_nested); ++ ++ } else { ++ /* profile2 cannot be NULL here. */ ++ spin_lock_irqsave_nested(&profile2->lock, profile2->int_flags, ++ aa_lock_normal); ++ spin_lock_nested(&profile1->lock, aa_lock_nested); ++ } ++} ++ ++/** ++ * unlock_both_profiles - unlock two profiles in a deadlock-free way ++ * @profile1: profile to unlock (may be NULL) ++ * @profile2: profile to unlock (may be NULL) ++ * ++ * The order in which profiles are passed into lock_both_profiles() / ++ * unlock_both_profiles() does not matter. ++ * While the profile is locked, local interrupts are disabled. This also ++ * gives us RCU reader safety. ++ */ ++static inline void unlock_both_profiles(struct aa_profile *profile1, ++ struct aa_profile *profile2) ++{ ++ /* Unlock the two profiles. */ ++ if (!profile1 || profile1 == profile2) { ++ if (profile2) ++ spin_unlock_irqrestore(&profile2->lock, ++ profile2->int_flags); ++ } else if (profile1 > profile2) { ++ /* profile1 cannot be NULL here. */ ++ if (profile2) ++ spin_unlock(&profile2->lock); ++ spin_unlock_irqrestore(&profile1->lock, profile1->int_flags); ++ } else { ++ /* profile2 cannot be NULL here. */ ++ spin_unlock(&profile1->lock); ++ spin_unlock_irqrestore(&profile2->lock, profile2->int_flags); ++ } ++} ++ ++static inline unsigned int aa_match(struct aa_dfa *dfa, const char *pathname, ++ int *audit_mask) ++{ ++ if (dfa) ++ return aa_dfa_match(dfa, pathname, audit_mask); ++ if (audit_mask) ++ *audit_mask = 0; ++ return 0; ++} ++ ++static inline int dfa_audit_mask(struct aa_dfa *dfa, unsigned int state) ++{ ++ return ACCEPT_TABLE2(dfa)[state]; ++} ++ ++#endif /* __INLINE_H__ */ +--- /dev/null ++++ b/security/apparmor/list.c +@@ -0,0 +1,174 @@ ++/* ++ * Copyright (C) 1998-2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ * AppArmor Profile List Management ++ */ ++ ++#include ++#include "apparmor.h" ++#include "inline.h" ++ ++/* list of profile namespaces and lock */ ++LIST_HEAD(profile_ns_list); ++rwlock_t profile_ns_list_lock = RW_LOCK_UNLOCKED; ++ ++/** ++ * __aa_find_namespace - look up a profile namespace on the namespace list ++ * @name: name of namespace to find ++ * @head: list to search ++ * ++ * Returns a pointer to the namespace on the list, or NULL if no namespace ++ * called @name exists. The caller must hold the profile_ns_list_lock. ++ */ ++struct aa_namespace *__aa_find_namespace(const char *name, ++ struct list_head *head) ++{ ++ struct aa_namespace *ns; ++ ++ list_for_each_entry(ns, head, list) { ++ if (!strcmp(ns->name, name)) ++ return ns; ++ } ++ ++ return NULL; ++} ++ ++/** ++ * __aa_find_profile - look up a profile on the profile list ++ * @name: name of profile to find ++ * @head: list to search ++ * ++ * Returns a pointer to the profile on the list, or NULL if no profile ++ * called @name exists. The caller must hold the profile_list_lock. ++ */ ++struct aa_profile *__aa_find_profile(const char *name, struct list_head *head) ++{ ++ struct aa_profile *profile; ++ ++ list_for_each_entry(profile, head, list) { ++ if (!strcmp(profile->name, name)) ++ return profile; ++ } ++ ++ return NULL; ++} ++ ++static void aa_profile_list_release(struct list_head *head) ++{ ++ struct aa_profile *profile, *tmp; ++ list_for_each_entry_safe(profile, tmp, head, list) { ++ /* Remove the profile from each task context it is on. */ ++ lock_profile(profile); ++ profile->isstale = 1; ++ aa_unconfine_tasks(profile); ++ list_del_init(&profile->list); ++ unlock_profile(profile); ++ aa_put_profile(profile); ++ } ++} ++ ++/** ++ * aa_profilelist_release - Remove all profiles from profile_list ++ */ ++void aa_profile_ns_list_release(void) ++{ ++ struct aa_namespace *ns, *tmp; ++ ++ /* Remove and release all the profiles on namespace profile lists. */ ++ write_lock(&profile_ns_list_lock); ++ list_for_each_entry_safe(ns, tmp, &profile_ns_list, list) { ++ write_lock(&ns->lock); ++ aa_profile_list_release(&ns->profiles); ++ list_del_init(&ns->list); ++ write_unlock(&ns->lock); ++ aa_put_namespace(ns); ++ } ++ write_unlock(&profile_ns_list_lock); ++} ++ ++ ++static struct aa_profile *next_profile(struct aa_profile *profile) ++{ ++ struct aa_profile *next = profile; ++ struct aa_namespace *ns; ++ ++ list_for_each_entry_continue(next, &profile->ns->profiles, list) ++ return next; ++ ++ ns = profile->ns; ++ read_unlock(&ns->lock); ++ list_for_each_entry_continue(ns, &profile_ns_list, list) { ++ read_lock(&ns->lock); ++ list_for_each_entry(profile, &ns->profiles, list) ++ return profile; ++ read_unlock(&ns->lock); ++ } ++ return NULL; ++} ++ ++static void *p_start(struct seq_file *f, loff_t *pos) ++{ ++ struct aa_namespace *ns; ++ loff_t l = *pos; ++ ++ read_lock(&profile_ns_list_lock); ++ if (!list_empty(&profile_ns_list)) { ++ struct aa_profile *profile = NULL; ++ ns = list_first_entry(&profile_ns_list, typeof(*ns), list); ++ read_lock(&ns->lock); ++ if (!list_empty(&ns->profiles)) ++ profile = list_first_entry(&ns->profiles, ++ typeof(*profile), list); ++ else ++ read_unlock(&ns->lock); ++ for ( ; profile && l > 0; l--) ++ profile = next_profile(profile); ++ return profile; ++ } ++ return NULL; ++} ++ ++static void *p_next(struct seq_file *f, void *p, loff_t *pos) ++{ ++ struct aa_profile *profile = (struct aa_profile *) p; ++ ++ (*pos)++; ++ profile = next_profile(profile); ++ ++ return profile; ++} ++ ++static void p_stop(struct seq_file *f, void *p) ++{ ++ struct aa_profile *profile = (struct aa_profile *) p; ++ ++ if (profile) ++ read_unlock(&profile->ns->lock); ++ read_unlock(&profile_ns_list_lock); ++} ++ ++static int seq_show_profile(struct seq_file *f, void *p) ++{ ++ struct aa_profile *profile = (struct aa_profile *)p; ++ ++ if (profile->ns == default_namespace) ++ seq_printf(f, "%s (%s)\n", profile->name, ++ PROFILE_COMPLAIN(profile) ? "complain" : "enforce"); ++ else ++ seq_printf(f, ":%s:%s (%s)\n", profile->ns->name, profile->name, ++ PROFILE_COMPLAIN(profile) ? "complain" : "enforce"); ++ return 0; ++} ++ ++/* Used in apparmorfs.c */ ++struct seq_operations apparmorfs_profiles_op = { ++ .start = p_start, ++ .next = p_next, ++ .stop = p_stop, ++ .show = seq_show_profile, ++}; +--- /dev/null ++++ b/security/apparmor/locking.txt +@@ -0,0 +1,68 @@ ++Locking in AppArmor ++=================== ++ ++Lock hierarchy: ++ ++ aa_interface_lock ++ profile_list_lock ++ aa_profile->lock ++ task_lock() ++ ++ ++Which lock protects what? ++ ++ /-----------------------+-------------------------------\ ++ | Variable | Lock | ++ >-----------------------+-------------------------------< ++ | profile_list | profile_list_lock | ++ +-----------------------+-------------------------------+ ++ | aa_profile | (reference count) | ++ +-----------------------+-------------------------------+ ++ | aa_profile-> | aa_profile->lock | ++ | isstale, | | ++ | task_contexts | | ++ +-----------------------+-------------------------------+ ++ | task_struct->security | read: RCU | ++ | | write: task_lock() | ++ +-----------------------+-------------------------------+ ++ | aa_profile->sub | handle on the profile (list | ++ | | is never modified) | ++ \-----------------------+-------------------------------/ ++ ++(Obviously, the list_heads embedded in data structures are always ++protected with the lock that also protects the list.) ++ ++When moving a task context from one profile to another, we grab both ++profile locks with lock_both_profiles(). This ensures that both locks ++are always taken in the same order, and so we won't deadlock. ++ ++Since task_struct->security is RCU protected the aa_task_struct it ++references is only guarenteed to exist for the rcu cycle. Where ++aa_task_context->profile is needed in blocking operations the ++profile's reference count is incremented and the profile reference ++is used. ++ ++Profiles on profile_list are never stale: when a profile becomes stale, ++it is removed from profile_list at the same time (under profile_list_lock ++and aa_profile->lock). ++ ++The aa_interface_lock is taken whenever user-space modifies the profile ++list, and can sleep. This ensures that profile loading/replacement/removal ++won't race with itself. We release the profile_list_lock as soon as ++possible to avoid stalling exec during profile loading/replacement/removal. ++ ++AppArmor uses lock subtyping to avoid false positives from lockdep. The ++profile lock is often taken nested, but it is guaranteed to be in a lock ++safe order and not the same lock when done, so it is safe. ++ ++A third lock type (aa_lock_task_release) is given to the profile lock ++when it is taken in soft irq context during task release (aa_release). ++This is to avoid a false positive between the task lock and the profile ++lock. In task context the profile lock wraps the task lock with irqs ++off, but the kernel takes the task lock with irqs enabled. This won't ++result in a deadlock because for a deadlock to occur the kernel must ++take dead task A's lock (irqs on), the rcu callback hook freeing ++dead task A must be run and AppArmor must be changing the profile on ++dead task A. The kernel should not be taking a dead task's task_lock ++at the same time the task is being freed by task rcu cleanup other wise ++the task would not be out of its quiescent period. +--- /dev/null ++++ b/security/apparmor/procattr.c +@@ -0,0 +1,195 @@ ++/* ++ * Copyright (C) 1998-2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ * AppArmor /proc/pid/attr handling ++ */ ++ ++#include "apparmor.h" ++#include "inline.h" ++ ++int aa_getprocattr(struct aa_profile *profile, char **string, unsigned *len) ++{ ++ char *str; ++ ++ if (profile) { ++ const char *mode_str = PROFILE_COMPLAIN(profile) ? ++ " (complain)" : " (enforce)"; ++ int mode_len, name_len, ns_len = 0; ++ ++ mode_len = strlen(mode_str); ++ name_len = strlen(profile->name); ++ if (profile->ns != default_namespace) ++ ns_len = strlen(profile->ns->name) + 2; ++ *len = mode_len + ns_len + name_len + 1; ++ str = kmalloc(*len, GFP_ATOMIC); ++ if (!str) ++ return -ENOMEM; ++ ++ if (ns_len) { ++ *str++ = ':'; ++ memcpy(str, profile->ns->name, ns_len - 2); ++ str += ns_len - 2; ++ *str++ = ':'; ++ } ++ memcpy(str, profile->name, name_len); ++ str += name_len; ++ memcpy(str, mode_str, mode_len); ++ str += mode_len; ++ *str++ = '\n'; ++ str -= *len; ++ } else { ++ const char *unconfined_str = "unconfined\n"; ++ ++ *len = strlen(unconfined_str); ++ str = kmalloc(*len, GFP_ATOMIC); ++ if (!str) ++ return -ENOMEM; ++ ++ memcpy(str, unconfined_str, *len); ++ } ++ *string = str; ++ ++ return 0; ++} ++ ++static char *split_token_from_name(const char *op, char *args, u64 *cookie) ++{ ++ char *name; ++ ++ *cookie = simple_strtoull(args, &name, 16); ++ if ((name == args) || *name != '^') { ++ AA_ERROR("%s: Invalid input '%s'", op, args); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ name++; /* skip ^ */ ++ if (!*name) ++ name = NULL; ++ return name; ++} ++ ++int aa_setprocattr_changehat(char *args) ++{ ++ char *hat; ++ u64 cookie; ++ ++ hat = split_token_from_name("change_hat", args, &cookie); ++ if (IS_ERR(hat)) ++ return PTR_ERR(hat); ++ ++ if (!hat && !cookie) { ++ AA_ERROR("change_hat: Invalid input, NULL hat and NULL magic"); ++ return -EINVAL; ++ } ++ ++ AA_DEBUG("%s: Magic 0x%llx Hat '%s'\n", ++ __FUNCTION__, cookie, hat ? hat : NULL); ++ ++ return aa_change_hat(hat, cookie); ++} ++ ++int aa_setprocattr_changeprofile(char *args) ++{ ++ char *name = args, *ns_name = NULL; ++ ++ if (name[0] == ':') { ++ char *split = strchr(&name[1], ':'); ++ if (split) { ++ *split = 0; ++ ns_name = &name[1]; ++ name = split + 1; ++ } ++ } ++ ++ return aa_change_profile(ns_name, name); ++} ++ ++int aa_setprocattr_setprofile(struct task_struct *task, char *args) ++{ ++ struct aa_profile *old_profile, *new_profile; ++ struct aa_namespace *ns; ++ struct aa_audit sa; ++ char *name, *ns_name = NULL; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "profile_set"; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.task = task->pid; ++ ++ AA_DEBUG("%s: current %d\n", ++ __FUNCTION__, current->pid); ++ ++ name = args; ++ if (args[0] != '/') { ++ char *split = strchr(args, ':'); ++ if (split) { ++ *split = 0; ++ ns_name = args; ++ name = split + 1; ++ } ++ } ++ if (ns_name) ++ ns = aa_find_namespace(ns_name); ++ else ++ ns = aa_get_namespace(default_namespace); ++ if (!ns) { ++ sa.name = ns_name; ++ sa.info = "unknown namespace"; ++ aa_audit_reject(NULL, &sa); ++ aa_put_namespace(ns); ++ return -EINVAL; ++ } ++ ++repeat: ++ if (strcmp(name, "unconfined") == 0) ++ new_profile = NULL; ++ else { ++ new_profile = aa_find_profile(ns, name); ++ if (!new_profile) { ++ sa.name = ns_name; ++ sa.name2 = name; ++ sa.info = "unknown profile"; ++ aa_audit_reject(NULL, &sa); ++ aa_put_namespace(ns); ++ return -EINVAL; ++ } ++ } ++ ++ old_profile = __aa_replace_profile(task, new_profile); ++ if (IS_ERR(old_profile)) { ++ int error; ++ ++ aa_put_profile(new_profile); ++ error = PTR_ERR(old_profile); ++ if (error == -ESTALE) ++ goto repeat; ++ aa_put_namespace(ns); ++ return error; ++ } ++ ++ if (new_profile) { ++ sa.name = ns_name; ++ sa.name2 = name; ++ sa.name3 = old_profile ? old_profile->name : ++ "unconfined"; ++ aa_audit_status(NULL, &sa); ++ } else { ++ if (old_profile) { ++ sa.name = "unconfined"; ++ sa.name2 = old_profile->name; ++ aa_audit_status(NULL, &sa); ++ } else { ++ sa.info = "task is unconfined"; ++ aa_audit_status(NULL, &sa); ++ } ++ } ++ aa_put_namespace(ns); ++ aa_put_profile(old_profile); ++ aa_put_profile(new_profile); ++ return 0; ++} diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-module_interface.diff b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-module_interface.diff new file mode 100644 index 0000000000..f373428d69 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-module_interface.diff @@ -0,0 +1,1350 @@ +From: John Johansen +Subject: AppArmor: Profile loading and manipulation, pathname matching + +Pathname matching, transition table loading, profile loading and +manipulation. + +Signed-off-by: John Johansen +Signed-off-by: Andreas Gruenbacher + +--- + security/apparmor/match.c | 364 ++++++++++++++ + security/apparmor/match.h | 87 +++ + security/apparmor/module_interface.c | 875 +++++++++++++++++++++++++++++++++++ + 3 files changed, 1326 insertions(+) + +--- /dev/null ++++ b/security/apparmor/match.c +@@ -0,0 +1,364 @@ ++/* ++ * Copyright (C) 2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ * Regular expression transition table matching ++ */ ++ ++#include ++#include ++#include ++#include "apparmor.h" ++#include "match.h" ++#include "inline.h" ++ ++static struct table_header *unpack_table(void *blob, size_t bsize) ++{ ++ struct table_header *table = NULL; ++ struct table_header th; ++ size_t tsize; ++ ++ if (bsize < sizeof(struct table_header)) ++ goto out; ++ ++ th.td_id = be16_to_cpu(*(u16 *) (blob)); ++ th.td_flags = be16_to_cpu(*(u16 *) (blob + 2)); ++ th.td_lolen = be32_to_cpu(*(u32 *) (blob + 8)); ++ blob += sizeof(struct table_header); ++ ++ if (!(th.td_flags == YYTD_DATA16 || th.td_flags == YYTD_DATA32 || ++ th.td_flags == YYTD_DATA8)) ++ goto out; ++ ++ tsize = table_size(th.td_lolen, th.td_flags); ++ if (bsize < tsize) ++ goto out; ++ ++ table = kmalloc(tsize, GFP_KERNEL); ++ if (table) { ++ *table = th; ++ if (th.td_flags == YYTD_DATA8) ++ UNPACK_ARRAY(table->td_data, blob, th.td_lolen, ++ u8, byte_to_byte); ++ else if (th.td_flags == YYTD_DATA16) ++ UNPACK_ARRAY(table->td_data, blob, th.td_lolen, ++ u16, be16_to_cpu); ++ else ++ UNPACK_ARRAY(table->td_data, blob, th.td_lolen, ++ u32, be32_to_cpu); ++ } ++ ++out: ++ return table; ++} ++ ++int unpack_dfa(struct aa_dfa *dfa, void *blob, size_t size) ++{ ++ int hsize, i; ++ int error = -ENOMEM; ++ ++ /* get dfa table set header */ ++ if (size < sizeof(struct table_set_header)) ++ goto fail; ++ ++ if (ntohl(*(u32 *)blob) != YYTH_MAGIC) ++ goto fail; ++ ++ hsize = ntohl(*(u32 *)(blob + 4)); ++ if (size < hsize) ++ goto fail; ++ ++ blob += hsize; ++ size -= hsize; ++ ++ error = -EPROTO; ++ while (size > 0) { ++ struct table_header *table; ++ table = unpack_table(blob, size); ++ if (!table) ++ goto fail; ++ ++ switch(table->td_id) { ++ case YYTD_ID_ACCEPT: ++ case YYTD_ID_ACCEPT2: ++ case YYTD_ID_BASE: ++ dfa->tables[table->td_id - 1] = table; ++ if (table->td_flags != YYTD_DATA32) ++ goto fail; ++ break; ++ case YYTD_ID_DEF: ++ case YYTD_ID_NXT: ++ case YYTD_ID_CHK: ++ dfa->tables[table->td_id - 1] = table; ++ if (table->td_flags != YYTD_DATA16) ++ goto fail; ++ break; ++ case YYTD_ID_EC: ++ dfa->tables[table->td_id - 1] = table; ++ if (table->td_flags != YYTD_DATA8) ++ goto fail; ++ break; ++ default: ++ kfree(table); ++ goto fail; ++ } ++ ++ blob += table_size(table->td_lolen, table->td_flags); ++ size -= table_size(table->td_lolen, table->td_flags); ++ } ++ ++ return 0; ++ ++fail: ++ for (i = 0; i < ARRAY_SIZE(dfa->tables); i++) { ++ if (dfa->tables[i]) { ++ kfree(dfa->tables[i]); ++ dfa->tables[i] = NULL; ++ } ++ } ++ return error; ++} ++ ++/** ++ * verify_dfa - verify that all the transitions and states in the dfa tables ++ * are in bounds. ++ * @dfa: dfa to test ++ * ++ * assumes dfa has gone through the verification done by unpacking ++ */ ++int verify_dfa(struct aa_dfa *dfa) ++{ ++ size_t i, state_count, trans_count; ++ int error = -EPROTO; ++ ++ /* check that required tables exist */ ++ if (!(dfa->tables[YYTD_ID_ACCEPT - 1] && ++ dfa->tables[YYTD_ID_ACCEPT2 - 1] && ++ dfa->tables[YYTD_ID_DEF - 1] && ++ dfa->tables[YYTD_ID_BASE - 1] && ++ dfa->tables[YYTD_ID_NXT - 1] && ++ dfa->tables[YYTD_ID_CHK - 1])) ++ goto out; ++ ++ /* accept.size == default.size == base.size */ ++ state_count = dfa->tables[YYTD_ID_BASE - 1]->td_lolen; ++ if (!(state_count == dfa->tables[YYTD_ID_DEF - 1]->td_lolen && ++ state_count == dfa->tables[YYTD_ID_ACCEPT - 1]->td_lolen && ++ state_count == dfa->tables[YYTD_ID_ACCEPT2 - 1]->td_lolen)) ++ goto out; ++ ++ /* next.size == chk.size */ ++ trans_count = dfa->tables[YYTD_ID_NXT - 1]->td_lolen; ++ if (trans_count != dfa->tables[YYTD_ID_CHK - 1]->td_lolen) ++ goto out; ++ ++ /* if equivalence classes then its table size must be 256 */ ++ if (dfa->tables[YYTD_ID_EC - 1] && ++ dfa->tables[YYTD_ID_EC - 1]->td_lolen != 256) ++ goto out; ++ ++ for (i = 0; i < state_count; i++) { ++ if (DEFAULT_TABLE(dfa)[i] >= state_count) ++ goto out; ++ if (BASE_TABLE(dfa)[i] >= trans_count + 256) ++ goto out; ++ } ++ ++ for (i = 0; i < trans_count ; i++) { ++ if (NEXT_TABLE(dfa)[i] >= state_count) ++ goto out; ++ if (CHECK_TABLE(dfa)[i] >= state_count) ++ goto out; ++ } ++ ++ /* verify accept permissions */ ++ for (i = 0; i < state_count; i++) { ++ int mode = ACCEPT_TABLE(dfa)[i]; ++ ++ if (mode & ~AA_VALID_PERM_MASK) ++ goto out; ++ if (ACCEPT_TABLE2(dfa)[i] & ~AA_VALID_PERM2_MASK) ++ goto out; ++ ++ /* if any exec modifier is set MAY_EXEC must be set */ ++ if ((mode & AA_USER_EXEC_TYPE) && !(mode & AA_USER_EXEC)) ++ goto out; ++ if ((mode & AA_OTHER_EXEC_TYPE) && !(mode & AA_OTHER_EXEC)) ++ goto out; ++ } ++ ++ error = 0; ++out: ++ return error; ++} ++ ++struct aa_dfa *aa_match_alloc(void) ++{ ++ return kzalloc(sizeof(struct aa_dfa), GFP_KERNEL); ++} ++ ++void aa_match_free(struct aa_dfa *dfa) ++{ ++ if (dfa) { ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(dfa->tables); i++) ++ kfree(dfa->tables[i]); ++ } ++ kfree(dfa); ++} ++ ++/** ++ * aa_dfa_next_state_len - traverse @dfa to find state @str stops at ++ * @dfa: the dfa to match @str against ++ * @start: the state of the dfa to start matching in ++ * @str: the string of bytes to match against the dfa ++ * @len: length of the string of bytes to match ++ * ++ * aa_dfa_next_state will match @str against the dfa and return the state it ++ * finished matching in. The final state can be used to look up the accepting ++ * label, or as the start state of a continuing match. ++ * ++ * aa_dfa_next_state could be implement using this function by doing ++ * return aa_dfa_next_state_len(dfa, start, str, strlen(str)); ++ * but that would require traversing the string twice and be slightly ++ * slower. ++ */ ++unsigned int aa_dfa_next_state_len(struct aa_dfa *dfa, unsigned int start, ++ const char *str, int len) ++{ ++ u16 *def = DEFAULT_TABLE(dfa); ++ u32 *base = BASE_TABLE(dfa); ++ u16 *next = NEXT_TABLE(dfa); ++ u16 *check = CHECK_TABLE(dfa); ++ unsigned int state = start, pos; ++ ++ if (state == 0) ++ return 0; ++ ++ /* current state is , matching character *str */ ++ if (dfa->tables[YYTD_ID_EC - 1]) { ++ u8 *equiv = EQUIV_TABLE(dfa); ++ for (; len; len--) { ++ pos = base[state] + equiv[(u8)*str++]; ++ if (check[pos] == state) ++ state = next[pos]; ++ else ++ state = def[state]; ++ } ++ } else { ++ for (; len; len--) { ++ pos = base[state] + (u8)*str++; ++ if (check[pos] == state) ++ state = next[pos]; ++ else ++ state = def[state]; ++ } ++ } ++ return state; ++} ++ ++/** ++ * aa_dfa_next_state - traverse @dfa to find state @str stops at ++ * @dfa: the dfa to match @str against ++ * @start: the state of the dfa to start matching in ++ * @str: the null terminated string of bytes to match against the dfa ++ * ++ * aa_dfa_next_state will match @str against the dfa and return the state it ++ * finished matching in. The final state can be used to look up the accepting ++ * label, or as the start state of a continuing match. ++ */ ++unsigned int aa_dfa_next_state(struct aa_dfa *dfa, unsigned int start, ++ const char *str) ++{ ++ u16 *def = DEFAULT_TABLE(dfa); ++ u32 *base = BASE_TABLE(dfa); ++ u16 *next = NEXT_TABLE(dfa); ++ u16 *check = CHECK_TABLE(dfa); ++ unsigned int state = start, pos; ++ ++ if (state == 0) ++ return 0; ++ ++ /* current state is , matching character *str */ ++ if (dfa->tables[YYTD_ID_EC - 1]) { ++ u8 *equiv = EQUIV_TABLE(dfa); ++ while (*str) { ++ pos = base[state] + equiv[(u8)*str++]; ++ if (check[pos] == state) ++ state = next[pos]; ++ else ++ state = def[state]; ++ } ++ } else { ++ while (*str) { ++ pos = base[state] + (u8)*str++; ++ if (check[pos] == state) ++ state = next[pos]; ++ else ++ state = def[state]; ++ } ++ } ++ return state; ++} ++ ++/** ++ * aa_dfa_null_transition - step to next state after null character ++ * @dfa: the dfa to match against ++ * @start: the state of the dfa to start matching in ++ * ++ * aa_dfa_null_transition transitions to the next state after a null ++ * character which is not used in standard matching and is only ++ * used to seperate pairs. ++ */ ++unsigned int aa_dfa_null_transition(struct aa_dfa *dfa, unsigned int start) ++{ ++ return aa_dfa_next_state_len(dfa, start, "", 1); ++} ++ ++/** ++ * aa_dfa_match - find accept perm for @str in @dfa ++ * @dfa: the dfa to match @str against ++ * @str: the string to match against the dfa ++ * @audit_mask: the audit_mask for the final state ++ * ++ * aa_dfa_match will match @str and return the accept perms for the ++ * final state. ++ */ ++unsigned int aa_dfa_match(struct aa_dfa *dfa, const char *str, int *audit_mask) ++{ ++ int state = aa_dfa_next_state(dfa, DFA_START, str); ++ if (audit_mask) ++ *audit_mask = dfa_audit_mask(dfa, state); ++ return ACCEPT_TABLE(dfa)[state]; ++} ++ ++/** ++ * aa_match_state - find accept perm and state for @str in @dfa ++ * @dfa: the dfa to match @str against ++ * @start: the state to start the match from ++ * @str: the string to match against the dfa ++ * @final: the state that the match finished in ++ * ++ * aa_match_state will match @str and return the accept perms, and @final ++ * state, the match occured in. ++ */ ++unsigned int aa_match_state(struct aa_dfa *dfa, unsigned int start, ++ const char *str, unsigned int *final) ++{ ++ unsigned int state; ++ if (dfa) { ++ state = aa_dfa_next_state(dfa, start, str); ++ if (final) ++ *final = state; ++ return ACCEPT_TABLE(dfa)[state]; ++ } ++ if (final) ++ *final = 0; ++ return 0; ++} ++ +--- /dev/null ++++ b/security/apparmor/match.h +@@ -0,0 +1,87 @@ ++/* ++ * Copyright (C) 2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ * AppArmor submodule (match) prototypes ++ */ ++ ++#ifndef __MATCH_H ++#define __MATCH_H ++ ++#define DFA_START 1 ++ ++/** ++ * The format used for transition tables is based on the GNU flex table ++ * file format (--tables-file option; see Table File Format in the flex ++ * info pages and the flex sources for documentation). The magic number ++ * used in the header is 0x1B5E783D insted of 0xF13C57B1 though, because ++ * the YY_ID_CHK (check) and YY_ID_DEF (default) tables are used ++ * slightly differently (see the apparmor-parser package). ++ */ ++ ++#define YYTH_MAGIC 0x1B5E783D ++ ++struct table_set_header { ++ u32 th_magic; /* YYTH_MAGIC */ ++ u32 th_hsize; ++ u32 th_ssize; ++ u16 th_flags; ++ char th_version[]; ++}; ++ ++#define YYTD_ID_ACCEPT 1 ++#define YYTD_ID_BASE 2 ++#define YYTD_ID_CHK 3 ++#define YYTD_ID_DEF 4 ++#define YYTD_ID_EC 5 ++#define YYTD_ID_META 6 ++#define YYTD_ID_ACCEPT2 7 ++#define YYTD_ID_NXT 8 ++ ++ ++#define YYTD_DATA8 1 ++#define YYTD_DATA16 2 ++#define YYTD_DATA32 4 ++ ++struct table_header { ++ u16 td_id; ++ u16 td_flags; ++ u32 td_hilen; ++ u32 td_lolen; ++ char td_data[]; ++}; ++ ++#define DEFAULT_TABLE(DFA) ((u16 *)((DFA)->tables[YYTD_ID_DEF - 1]->td_data)) ++#define BASE_TABLE(DFA) ((u32 *)((DFA)->tables[YYTD_ID_BASE - 1]->td_data)) ++#define NEXT_TABLE(DFA) ((u16 *)((DFA)->tables[YYTD_ID_NXT - 1]->td_data)) ++#define CHECK_TABLE(DFA) ((u16 *)((DFA)->tables[YYTD_ID_CHK - 1]->td_data)) ++#define EQUIV_TABLE(DFA) ((u8 *)((DFA)->tables[YYTD_ID_EC - 1]->td_data)) ++#define ACCEPT_TABLE(DFA) ((u32 *)((DFA)->tables[YYTD_ID_ACCEPT - 1]->td_data)) ++#define ACCEPT_TABLE2(DFA) ((u32 *)((DFA)->tables[YYTD_ID_ACCEPT2 -1]->td_data)) ++ ++struct aa_dfa { ++ struct table_header *tables[YYTD_ID_NXT]; ++}; ++ ++#define byte_to_byte(X) (X) ++ ++#define UNPACK_ARRAY(TABLE, BLOB, LEN, TYPE, NTOHX) \ ++ do { \ ++ typeof(LEN) __i; \ ++ TYPE *__t = (TYPE *) TABLE; \ ++ TYPE *__b = (TYPE *) BLOB; \ ++ for (__i = 0; __i < LEN; __i++) { \ ++ __t[__i] = NTOHX(__b[__i]); \ ++ } \ ++ } while (0) ++ ++static inline size_t table_size(size_t len, size_t el_size) ++{ ++ return ALIGN(sizeof(struct table_header) + len * el_size, 8); ++} ++ ++#endif /* __MATCH_H */ +--- /dev/null ++++ b/security/apparmor/module_interface.c +@@ -0,0 +1,875 @@ ++/* ++ * Copyright (C) 1998-2007 Novell/SUSE ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ * AppArmor userspace policy interface ++ */ ++ ++#include ++ ++#include "apparmor.h" ++#include "inline.h" ++ ++/* ++ * This mutex is used to synchronize profile adds, replacements, and ++ * removals: we only allow one of these operations at a time. ++ * We do not use the profile list lock here in order to avoid blocking ++ * exec during those operations. (Exec involves a profile list lookup ++ * for named-profile transitions.) ++ */ ++DEFINE_MUTEX(aa_interface_lock); ++ ++/* ++ * The AppArmor interface treats data as a type byte followed by the ++ * actual data. The interface has the notion of a a named entry ++ * which has a name (AA_NAME typecode followed by name string) followed by ++ * the entries typecode and data. Named types allow for optional ++ * elements and extensions to be added and tested for without breaking ++ * backwards compatability. ++ */ ++ ++enum aa_code { ++ AA_U8, ++ AA_U16, ++ AA_U32, ++ AA_U64, ++ AA_NAME, /* same as string except it is items name */ ++ AA_STRING, ++ AA_BLOB, ++ AA_STRUCT, ++ AA_STRUCTEND, ++ AA_LIST, ++ AA_LISTEND, ++ AA_ARRAY, ++ AA_ARRAYEND, ++}; ++ ++/* ++ * aa_ext is the read of the buffer containing the serialized profile. The ++ * data is copied into a kernel buffer in apparmorfs and then handed off to ++ * the unpack routines. ++ */ ++struct aa_ext { ++ void *start; ++ void *end; ++ void *pos; /* pointer to current position in the buffer */ ++ u32 version; ++ char *ns_name; ++}; ++ ++static inline int aa_inbounds(struct aa_ext *e, size_t size) ++{ ++ return (size <= e->end - e->pos); ++} ++ ++/** ++ * aa_u16_chunck - test and do bounds checking for a u16 size based chunk ++ * @e: serialized data read head ++ * @chunk: start address for chunk of data ++ * ++ * return the size of chunk found with the read head at the end of ++ * the chunk. ++ */ ++static size_t aa_is_u16_chunk(struct aa_ext *e, char **chunk) ++{ ++ void *pos = e->pos; ++ size_t size = 0; ++ ++ if (!aa_inbounds(e, sizeof(u16))) ++ goto fail; ++ size = le16_to_cpu(get_unaligned((u16 *)e->pos)); ++ e->pos += sizeof(u16); ++ if (!aa_inbounds(e, size)) ++ goto fail; ++ *chunk = e->pos; ++ e->pos += size; ++ return size; ++ ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++static inline int aa_is_X(struct aa_ext *e, enum aa_code code) ++{ ++ if (!aa_inbounds(e, 1)) ++ return 0; ++ if (*(u8 *) e->pos != code) ++ return 0; ++ e->pos++; ++ return 1; ++} ++ ++/** ++ * aa_is_nameX - check is the next element is of type X with a name of @name ++ * @e: serialized data extent information ++ * @code: type code ++ * @name: name to match to the serialized element. ++ * ++ * check that the next serialized data element is of type X and has a tag ++ * name @name. If @name is specified then there must be a matching ++ * name element in the stream. If @name is NULL any name element will be ++ * skipped and only the typecode will be tested. ++ * returns 1 on success (both type code and name tests match) and the read ++ * head is advanced past the headers ++ * returns %0 if either match failes, the read head does not move ++ */ ++static int aa_is_nameX(struct aa_ext *e, enum aa_code code, const char *name) ++{ ++ void *pos = e->pos; ++ /* ++ * Check for presence of a tagname, and if present name size ++ * AA_NAME tag value is a u16. ++ */ ++ if (aa_is_X(e, AA_NAME)) { ++ char *tag; ++ size_t size = aa_is_u16_chunk(e, &tag); ++ /* if a name is specified it must match. otherwise skip tag */ ++ if (name && (!size || strcmp(name, tag))) ++ goto fail; ++ } else if (name) { ++ /* if a name is specified and there is no name tag fail */ ++ goto fail; ++ } ++ ++ /* now check if type code matches */ ++ if (aa_is_X(e, code)) ++ return 1; ++ ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++static int aa_is_u16(struct aa_ext *e, u16 *data, const char *name) ++{ ++ void *pos = e->pos; ++ if (aa_is_nameX(e, AA_U16, name)) { ++ if (!aa_inbounds(e, sizeof(u16))) ++ goto fail; ++ if (data) ++ *data = le16_to_cpu(get_unaligned((u16 *)e->pos)); ++ e->pos += sizeof(u16); ++ return 1; ++ } ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++static int aa_is_u32(struct aa_ext *e, u32 *data, const char *name) ++{ ++ void *pos = e->pos; ++ if (aa_is_nameX(e, AA_U32, name)) { ++ if (!aa_inbounds(e, sizeof(u32))) ++ goto fail; ++ if (data) ++ *data = le32_to_cpu(get_unaligned((u32 *)e->pos)); ++ e->pos += sizeof(u32); ++ return 1; ++ } ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++static size_t aa_is_array(struct aa_ext *e, const char *name) ++{ ++ void *pos = e->pos; ++ if (aa_is_nameX(e, AA_ARRAY, name)) { ++ int size; ++ if (!aa_inbounds(e, sizeof(u16))) ++ goto fail; ++ size = (int) le16_to_cpu(get_unaligned((u16 *)e->pos)); ++ e->pos += sizeof(u16); ++ return size; ++ } ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++static size_t aa_is_blob(struct aa_ext *e, char **blob, const char *name) ++{ ++ void *pos = e->pos; ++ if (aa_is_nameX(e, AA_BLOB, name)) { ++ u32 size; ++ if (!aa_inbounds(e, sizeof(u32))) ++ goto fail; ++ size = le32_to_cpu(get_unaligned((u32 *)e->pos)); ++ e->pos += sizeof(u32); ++ if (aa_inbounds(e, (size_t) size)) { ++ * blob = e->pos; ++ e->pos += size; ++ return size; ++ } ++ } ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++static int aa_is_dynstring(struct aa_ext *e, char **string, const char *name) ++{ ++ char *src_str; ++ size_t size = 0; ++ void *pos = e->pos; ++ *string = NULL; ++ if (aa_is_nameX(e, AA_STRING, name) && ++ (size = aa_is_u16_chunk(e, &src_str))) { ++ char *str; ++ if (!(str = kmalloc(size, GFP_KERNEL))) ++ goto fail; ++ memcpy(str, src_str, size); ++ *string = str; ++ } ++ ++ return size; ++ ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++/** ++ * aa_unpack_dfa - unpack a file rule dfa ++ * @e: serialized data extent information ++ * ++ * returns dfa or ERR_PTR ++ */ ++static struct aa_dfa *aa_unpack_dfa(struct aa_ext *e) ++{ ++ char *blob = NULL; ++ size_t size, error = 0; ++ struct aa_dfa *dfa = NULL; ++ ++ size = aa_is_blob(e, &blob, "aadfa"); ++ if (size) { ++ dfa = aa_match_alloc(); ++ if (dfa) { ++ /* ++ * The dfa is aligned with in the blob to 8 bytes ++ * from the beginning of the stream. ++ */ ++ size_t sz = blob - (char *) e->start; ++ size_t pad = ALIGN(sz, 8) - sz; ++ error = unpack_dfa(dfa, blob + pad, size - pad); ++ if (!error) ++ error = verify_dfa(dfa); ++ } else { ++ error = -ENOMEM; ++ } ++ ++ if (error) { ++ aa_match_free(dfa); ++ dfa = ERR_PTR(error); ++ } ++ } ++ ++ return dfa; ++} ++ ++static int aa_unpack_exec_table(struct aa_ext *e, struct aa_profile *profile) ++{ ++ void *pos = e->pos; ++ ++ /* exec table is optional */ ++ if (aa_is_nameX(e, AA_STRUCT, "xtable")) { ++ int i, size; ++ ++ size = aa_is_array(e, NULL); ++ /* currently 4 exec bits and entries 0-3 are reserved iupcx */ ++ if (size > 16 - 4) ++ goto fail; ++ profile->exec_table = kzalloc(sizeof(char *) * size, ++ GFP_KERNEL); ++ if (!profile->exec_table) ++ goto fail; ++ ++ for (i = 0; i < size; i++) { ++ char *tmp; ++ if (!aa_is_dynstring(e, &tmp, NULL)) ++ goto fail; ++ /* note: strings beginning with a : have an embedded ++ \0 seperating the profile ns name from the profile ++ name */ ++ profile->exec_table[i] = tmp; ++ } ++ if (!aa_is_nameX(e, AA_ARRAYEND, NULL)) ++ goto fail; ++ if (!aa_is_nameX(e, AA_STRUCTEND, NULL)) ++ goto fail; ++ profile->exec_table_size = size; ++ } ++ return 1; ++ ++fail: ++ e->pos = pos; ++ return 0; ++} ++ ++/** ++ * aa_unpack_profile - unpack a serialized profile ++ * @e: serialized data extent information ++ * @sa: audit struct for the operation ++ */ ++static struct aa_profile *aa_unpack_profile(struct aa_ext *e, ++ struct aa_audit *sa) ++{ ++ struct aa_profile *profile = NULL; ++ ++ int error = -EPROTO; ++ ++ profile = alloc_aa_profile(); ++ if (!profile) ++ return ERR_PTR(-ENOMEM); ++ ++ /* check that we have the right struct being passed */ ++ if (!aa_is_nameX(e, AA_STRUCT, "profile")) ++ goto fail; ++ if (!aa_is_dynstring(e, &profile->name, NULL)) ++ goto fail; ++ ++ /* per profile debug flags (complain, audit) */ ++ if (!aa_is_nameX(e, AA_STRUCT, "flags")) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->flags.hat), NULL)) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->flags.complain), NULL)) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->flags.audit), NULL)) ++ goto fail; ++ if (!aa_is_nameX(e, AA_STRUCTEND, NULL)) ++ goto fail; ++ ++ if (!aa_is_u32(e, &(profile->capabilities), NULL)) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->audit_caps), NULL)) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->quiet_caps), NULL)) ++ goto fail; ++ if (!aa_is_u32(e, &(profile->set_caps), NULL)) ++ goto fail; ++ ++ /* get file rules */ ++ profile->file_rules = aa_unpack_dfa(e); ++ if (IS_ERR(profile->file_rules)) { ++ error = PTR_ERR(profile->file_rules); ++ profile->file_rules = NULL; ++ goto fail; ++ } ++ ++ if (!aa_unpack_exec_table(e, profile)) ++ goto fail; ++ ++ if (!aa_is_nameX(e, AA_STRUCTEND, NULL)) ++ goto fail; ++ ++ return profile; ++ ++fail: ++ sa->name = profile && profile->name ? profile->name : "unknown"; ++ if (!sa->info) ++ sa->info = "failed to unpack profile"; ++ aa_audit_status(NULL, sa); ++ ++ if (profile) ++ free_aa_profile(profile); ++ ++ return ERR_PTR(error); ++} ++ ++/** ++ * aa_verify_head - unpack serialized stream header ++ * @e: serialized data read head ++ * @operation: operation header is being verified for ++ * ++ * returns error or 0 if header is good ++ */ ++static int aa_verify_header(struct aa_ext *e, struct aa_audit *sa) ++{ ++ /* get the interface version */ ++ if (!aa_is_u32(e, &e->version, "version")) { ++ sa->info = "invalid profile format"; ++ aa_audit_status(NULL, sa); ++ return -EPROTONOSUPPORT; ++ } ++ ++ /* check that the interface version is currently supported */ ++ if (e->version != 5) { ++ sa->info = "unsupported interface version"; ++ aa_audit_status(NULL, sa); ++ return -EPROTONOSUPPORT; ++ } ++ ++ /* read the namespace if present */ ++ if (!aa_is_dynstring(e, &e->ns_name, "namespace")) { ++ e->ns_name = NULL; ++ } ++ ++ return 0; ++} ++ ++/** ++ * aa_add_profile - Unpack and add a new profile to the profile list ++ * @data: serialized data stream ++ * @size: size of the serialized data stream ++ */ ++ssize_t aa_add_profile(void *data, size_t size) ++{ ++ struct aa_profile *profile = NULL; ++ struct aa_namespace *ns = NULL; ++ struct aa_ext e = { ++ .start = data, ++ .end = data + size, ++ .pos = data, ++ .ns_name = NULL ++ }; ++ ssize_t error; ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "profile_load"; ++ sa.gfp_mask = GFP_KERNEL; ++ ++ error = aa_verify_header(&e, &sa); ++ if (error) ++ return error; ++ ++ profile = aa_unpack_profile(&e, &sa); ++ if (IS_ERR(profile)) ++ return PTR_ERR(profile); ++ ++ mutex_lock(&aa_interface_lock); ++ write_lock(&profile_ns_list_lock); ++ if (e.ns_name) ++ ns = __aa_find_namespace(e.ns_name, &profile_ns_list); ++ else ++ ns = default_namespace; ++ if (!ns) { ++ struct aa_namespace *new_ns; ++ write_unlock(&profile_ns_list_lock); ++ new_ns = alloc_aa_namespace(e.ns_name); ++ if (!new_ns) { ++ mutex_unlock(&aa_interface_lock); ++ return -ENOMEM; ++ } ++ write_lock(&profile_ns_list_lock); ++ ns = __aa_find_namespace(e.ns_name, &profile_ns_list); ++ if (!ns) { ++ list_add(&new_ns->list, &profile_ns_list); ++ ns = new_ns; ++ } else ++ free_aa_namespace(new_ns); ++ } ++ ++ write_lock(&ns->lock); ++ if (__aa_find_profile(profile->name, &ns->profiles)) { ++ /* A profile with this name exists already. */ ++ write_unlock(&ns->lock); ++ write_unlock(&profile_ns_list_lock); ++ sa.name = profile->name; ++ sa.name2 = ns->name; ++ sa.info = "failed: profile already loaded"; ++ aa_audit_status(NULL, &sa); ++ mutex_unlock(&aa_interface_lock); ++ aa_put_profile(profile); ++ return -EEXIST; ++ } ++ profile->ns = aa_get_namespace(ns); ++ ns->profile_count++; ++ list_add(&profile->list, &ns->profiles); ++ write_unlock(&ns->lock); ++ write_unlock(&profile_ns_list_lock); ++ ++ sa.name = profile->name; ++ sa.name2 = ns->name; ++ aa_audit_status(NULL, &sa); ++ mutex_unlock(&aa_interface_lock); ++ return size; ++} ++ ++/** ++ * task_replace - replace a task's profile ++ * @task: task to replace profile on ++ * @new_cxt: new aa_task_context to do replacement with ++ * @new_profile: new profile ++ */ ++static inline void task_replace(struct task_struct *task, ++ struct aa_task_context *new_cxt, ++ struct aa_profile *new_profile) ++{ ++ struct aa_task_context *cxt = aa_task_context(task); ++ ++ AA_DEBUG("%s: replacing profile for task %d " ++ "profile=%s (%p)\n", ++ __FUNCTION__, ++ cxt->task->pid, ++ cxt->profile->name, cxt->profile); ++ ++ aa_change_task_context(task, new_cxt, new_profile, cxt->cookie, ++ cxt->previous_profile); ++} ++ ++/** ++ * aa_replace_profile - replace a profile on the profile list ++ * @udata: serialized data stream ++ * @size: size of the serialized data stream ++ * ++ * unpack and replace a profile on the profile list and uses of that profile ++ * by any aa_task_context. If the profile does not exist on the profile list ++ * it is added. Return %0 or error. ++ */ ++ssize_t aa_replace_profile(void *udata, size_t size) ++{ ++ struct aa_profile *old_profile, *new_profile; ++ struct aa_namespace *ns; ++ struct aa_task_context *new_cxt; ++ struct aa_ext e = { ++ .start = udata, ++ .end = udata + size, ++ .pos = udata, ++ .ns_name = NULL ++ }; ++ ssize_t error; ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "profile_replace"; ++ sa.gfp_mask = GFP_KERNEL; ++ ++ error = aa_verify_header(&e, &sa); ++ if (error) ++ return error; ++ ++ new_profile = aa_unpack_profile(&e, &sa); ++ if (IS_ERR(new_profile)) ++ return PTR_ERR(new_profile); ++ ++ mutex_lock(&aa_interface_lock); ++ write_lock(&profile_ns_list_lock); ++ if (e.ns_name) ++ ns = __aa_find_namespace(e.ns_name, &profile_ns_list); ++ else ++ ns = default_namespace; ++ if (!ns) { ++ struct aa_namespace *new_ns; ++ write_unlock(&profile_ns_list_lock); ++ new_ns = alloc_aa_namespace(e.ns_name); ++ if (!new_ns) { ++ mutex_unlock(&aa_interface_lock); ++ return -ENOMEM; ++ } ++ write_lock(&profile_ns_list_lock); ++ ns = __aa_find_namespace(e.ns_name, &profile_ns_list); ++ if (!ns) { ++ list_add(&new_ns->list, &profile_ns_list); ++ ns = new_ns; ++ } else ++ free_aa_namespace(new_ns); ++ } ++ ++ write_lock(&ns->lock); ++ old_profile = __aa_find_profile(new_profile->name, &ns->profiles); ++ if (old_profile) { ++ lock_profile(old_profile); ++ old_profile->isstale = 1; ++ list_del_init(&old_profile->list); ++ unlock_profile(old_profile); ++ ns->profile_count--; ++ } ++ new_profile->ns = aa_get_namespace(ns); ++ ns->profile_count++; ++ /* not don't need an extra ref count to keep new_profile as ++ * it is protect by the interface mutex */ ++ list_add(&new_profile->list, &ns->profiles); ++ write_unlock(&ns->lock); ++ write_unlock(&profile_ns_list_lock); ++ ++ if (!old_profile) { ++ sa.operation = "profile_load"; ++ goto out; ++ } ++ /* ++ * Replacement needs to allocate a new aa_task_context for each ++ * task confined by old_profile. To do this the profile locks ++ * are only held when the actual switch is done per task. While ++ * looping to allocate a new aa_task_context the old_task list ++ * may get shorter if tasks exit/change their profile but will ++ * not get longer as new task will not use old_profile detecting ++ * that is stale. ++ */ ++ do { ++ new_cxt = aa_alloc_task_context(GFP_KERNEL | __GFP_NOFAIL); ++ ++ lock_both_profiles(old_profile, new_profile); ++ if (!list_empty(&old_profile->task_contexts)) { ++ struct task_struct *task = ++ list_entry(old_profile->task_contexts.next, ++ struct aa_task_context, list)->task; ++ task_lock(task); ++ task_replace(task, new_cxt, new_profile); ++ task_unlock(task); ++ new_cxt = NULL; ++ } ++ unlock_both_profiles(old_profile, new_profile); ++ } while (!new_cxt); ++ aa_free_task_context(new_cxt); ++ aa_put_profile(old_profile); ++ ++out: ++ sa.name = new_profile->name; ++ sa.name2 = ns->name; ++ aa_audit_status(NULL, &sa); ++ mutex_unlock(&aa_interface_lock); ++ return size; ++} ++ ++/** ++ * aa_remove_profile - remove a profile from the system ++ * @name: name of the profile to remove ++ * @size: size of the name ++ * ++ * remove a profile from the profile list and all aa_task_context references ++ * to said profile. ++ */ ++ssize_t aa_remove_profile(char *name, size_t size) ++{ ++ struct aa_namespace *ns; ++ struct aa_profile *profile; ++ struct aa_audit sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "profile_remove"; ++ sa.gfp_mask = GFP_KERNEL; ++ ++ mutex_lock(&aa_interface_lock); ++ write_lock(&profile_ns_list_lock); ++ ++ if (name[0] == ':') { ++ char *split = strchr(name + 1, ':'); ++ if (!split) ++ goto noent; ++ *split = 0; ++ ns = __aa_find_namespace(name + 1, &profile_ns_list); ++ name = split + 1; ++ } else { ++ ns = default_namespace; ++ } ++ ++ if (!ns) ++ goto noent; ++ sa.name2 = ns->name; ++ write_lock(&ns->lock); ++ profile = __aa_find_profile(name, &ns->profiles); ++ if (!profile) { ++ write_unlock(&ns->lock); ++ goto noent; ++ } ++ sa.name = profile->name; ++ ++ /* Remove the profile from each task context it is on. */ ++ lock_profile(profile); ++ profile->isstale = 1; ++ aa_unconfine_tasks(profile); ++ list_del_init(&profile->list); ++ ns->profile_count--; ++ unlock_profile(profile); ++ /* Release the profile itself. */ ++ write_unlock(&ns->lock); ++ /* check to see if the namespace has become stale */ ++ if (ns != default_namespace && ns->profile_count == 0) { ++ list_del_init(&ns->list); ++ aa_put_namespace(ns); ++ } ++ write_unlock(&profile_ns_list_lock); ++ ++ aa_audit_status(NULL, &sa); ++ mutex_unlock(&aa_interface_lock); ++ aa_put_profile(profile); ++ ++ return size; ++ ++noent: ++ write_unlock(&profile_ns_list_lock); ++ sa.info = "failed: profile does not exist"; ++ aa_audit_status(NULL, &sa); ++ mutex_unlock(&aa_interface_lock); ++ return -ENOENT; ++} ++ ++/** ++ * free_aa_namespace_kref - free aa_namespace by kref (see aa_put_namespace) ++ * @kr: kref callback for freeing of a namespace ++ */ ++void free_aa_namespace_kref(struct kref *kref) ++{ ++ struct aa_namespace *ns=container_of(kref, struct aa_namespace, count); ++ ++ free_aa_namespace(ns); ++} ++ ++/** ++ * alloc_aa_namespace - allocate, initialize and return a new namespace ++ * @name: a preallocated name ++ * Returns NULL on failure. ++ */ ++struct aa_namespace *alloc_aa_namespace(char *name) ++{ ++ struct aa_namespace *ns; ++ ++ ns = kzalloc(sizeof(*ns), GFP_KERNEL); ++ AA_DEBUG("%s(%p)\n", __FUNCTION__, ns); ++ if (ns) { ++ ns->name = name; ++ INIT_LIST_HEAD(&ns->list); ++ INIT_LIST_HEAD(&ns->profiles); ++ kref_init(&ns->count); ++ rwlock_init(&ns->lock); ++ ++ ns->null_complain_profile = alloc_aa_profile(); ++ if (!ns->null_complain_profile) { ++ if (!name) ++ kfree(ns->name); ++ kfree(ns); ++ return NULL; ++ } ++ ns->null_complain_profile->name = ++ kstrdup("null-complain-profile", GFP_KERNEL); ++ if (!ns->null_complain_profile->name) { ++ free_aa_profile(ns->null_complain_profile); ++ if (!name) ++ kfree(ns->name); ++ kfree(ns); ++ return NULL; ++ } ++ ns->null_complain_profile->flags.complain = 1; ++ /* null_complain_profile doesn't contribute to ns ref count */ ++ ns->null_complain_profile->ns = ns; ++ } ++ return ns; ++} ++ ++/** ++ * free_aa_namespace - free a profile namespace ++ * @namespace: the namespace to free ++ * ++ * Free a namespace. All references to the namespace must have been put. ++ * If the namespace was referenced by a profile confining a task, ++ * free_aa_namespace will be called indirectly (through free_aa_profile) ++ * from an rcu callback routine, so we must not sleep here. ++ */ ++void free_aa_namespace(struct aa_namespace *ns) ++{ ++ AA_DEBUG("%s(%p)\n", __FUNCTION__, ns); ++ ++ if (!ns) ++ return; ++ ++ /* namespace still contains profiles -- invalid */ ++ if (!list_empty(&ns->profiles)) { ++ AA_ERROR("%s: internal error, " ++ "namespace '%s' still contains profiles\n", ++ __FUNCTION__, ++ ns->name); ++ BUG(); ++ } ++ if (!list_empty(&ns->list)) { ++ AA_ERROR("%s: internal error, " ++ "namespace '%s' still on list\n", ++ __FUNCTION__, ++ ns->name); ++ BUG(); ++ } ++ /* null_complain_profile doesn't contribute to ns ref counting */ ++ ns->null_complain_profile->ns = NULL; ++ aa_put_profile(ns->null_complain_profile); ++ kfree(ns->name); ++ kfree(ns); ++} ++ ++/** ++ * free_aa_profile_kref - free aa_profile by kref (called by aa_put_profile) ++ * @kr: kref callback for freeing of a profile ++ */ ++void free_aa_profile_kref(struct kref *kref) ++{ ++ struct aa_profile *p=container_of(kref, struct aa_profile, count); ++ ++ free_aa_profile(p); ++} ++ ++/** ++ * alloc_aa_profile - allocate, initialize and return a new profile ++ * Returns NULL on failure. ++ */ ++struct aa_profile *alloc_aa_profile(void) ++{ ++ struct aa_profile *profile; ++ ++ profile = kzalloc(sizeof(*profile), GFP_KERNEL); ++ AA_DEBUG("%s(%p)\n", __FUNCTION__, profile); ++ if (profile) { ++ INIT_LIST_HEAD(&profile->list); ++ kref_init(&profile->count); ++ INIT_LIST_HEAD(&profile->task_contexts); ++ spin_lock_init(&profile->lock); ++ } ++ return profile; ++} ++ ++/** ++ * free_aa_profile - free a profile ++ * @profile: the profile to free ++ * ++ * Free a profile, its hats and null_profile. All references to the profile, ++ * its hats and null_profile must have been put. ++ * ++ * If the profile was referenced from a task context, free_aa_profile() will ++ * be called from an rcu callback routine, so we must not sleep here. ++ */ ++void free_aa_profile(struct aa_profile *profile) ++{ ++ AA_DEBUG("%s(%p)\n", __FUNCTION__, profile); ++ ++ if (!profile) ++ return; ++ ++ /* profile is still on profile namespace list -- invalid */ ++ if (!list_empty(&profile->list)) { ++ AA_ERROR("%s: internal error, " ++ "profile '%s' still on global list\n", ++ __FUNCTION__, ++ profile->name); ++ BUG(); ++ } ++ aa_put_namespace(profile->ns); ++ ++ aa_match_free(profile->file_rules); ++ ++ if (profile->name) { ++ AA_DEBUG("%s: %s\n", __FUNCTION__, profile->name); ++ kfree(profile->name); ++ } ++ ++ kfree(profile); ++} ++ ++/** ++ * aa_unconfine_tasks - remove tasks on a profile's task context list ++ * @profile: profile to remove tasks from ++ * ++ * Assumes that @profile lock is held. ++ */ ++void aa_unconfine_tasks(struct aa_profile *profile) ++{ ++ while (!list_empty(&profile->task_contexts)) { ++ struct task_struct *task = ++ list_entry(profile->task_contexts.next, ++ struct aa_task_context, list)->task; ++ task_lock(task); ++ aa_change_task_context(task, NULL, NULL, 0, NULL); ++ task_unlock(task); ++ } ++} diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-network.diff b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-network.diff new file mode 100644 index 0000000000..d9064599b6 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-network.diff @@ -0,0 +1,408 @@ +From: John Johansen +Subject: AppArmor: Simplified network controls for AppArmor + +Simple network control determining which network families a confined +application has access to. + +Signed-off-by: John Johansen + +--- + security/apparmor/Makefile | 7 + + security/apparmor/apparmor.h | 9 ++ + security/apparmor/lsm.c | 129 ++++++++++++++++++++++++++++++++++- + security/apparmor/main.c | 107 ++++++++++++++++++++++++++++- + security/apparmor/module_interface.c | 26 ++++++- + 5 files changed, 271 insertions(+), 7 deletions(-) + +--- a/security/apparmor/Makefile ++++ b/security/apparmor/Makefile +@@ -8,6 +8,11 @@ apparmor-y := main.o list.o procattr.o l + quiet_cmd_make-caps = GEN $@ + cmd_make-caps = sed -n -e "/CAP_FS_MASK/d" -e "s/^\#define[ \\t]\\+CAP_\\([A-Z0-9_]\\+\\)[ \\t]\\+\\([0-9]\\+\\)\$$/[\\2] = \"\\1\",/p" $< | tr A-Z a-z > $@ + +-$(obj)/main.o : $(obj)/capability_names.h ++quiet_cmd_make-af = GEN $@ ++cmd_make-af = sed -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "s/^\#define[ \\t]\\+AF_\\([A-Z0-9_]\\+\\)[ \\t]\\+\\([0-9]\\+\\)\\(.*\\)\$$/[\\2] = \"\\1\",/p" $< | tr A-Z a-z > $@ ++ ++$(obj)/main.o : $(obj)/capability_names.h $(obj)/af_names.h + $(obj)/capability_names.h : $(srctree)/include/linux/capability.h + $(call cmd,make-caps) ++$(obj)/af_names.h : $(srctree)/include/linux/socket.h ++ $(call cmd,make-af) +--- a/security/apparmor/apparmor.h ++++ b/security/apparmor/apparmor.h +@@ -16,6 +16,8 @@ + #include + #include + #include ++#include ++#include + + /* + * We use MAY_READ, MAY_WRITE, MAY_EXEC, MAY_APPEND and the following flags +@@ -212,6 +214,9 @@ struct aa_profile { + struct list_head task_contexts; + spinlock_t lock; + unsigned long int_flags; ++ u16 network_families[AF_MAX]; ++ u16 audit_network[AF_MAX]; ++ u16 quiet_network[AF_MAX]; + }; + + extern struct list_head profile_ns_list; +@@ -258,6 +263,7 @@ struct aa_audit { + int request_mask, denied_mask, audit_mask; + struct iattr *iattr; + pid_t task, parent; ++ int family, type, protocol; + int error_code; + }; + +@@ -319,6 +325,9 @@ extern void aa_change_task_context(struc + struct aa_profile *previous_profile); + extern int aa_may_ptrace(struct aa_task_context *cxt, + struct aa_profile *tracee); ++extern int aa_net_perm(struct aa_profile *profile, char *operation, ++ int family, int type, int protocol); ++extern int aa_revalidate_sk(struct sock *sk, char *operation); + + /* lsm.c */ + extern int apparmor_initialized; +--- a/security/apparmor/lsm.c ++++ b/security/apparmor/lsm.c +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + + #include "apparmor.h" + #include "inline.h" +@@ -680,6 +681,117 @@ static void apparmor_task_free_security( + aa_release(task); + } + ++static int apparmor_socket_create(int family, int type, int protocol, int kern) ++{ ++ struct aa_profile *profile; ++ int error = 0; ++ ++ if (kern) ++ return 0; ++ ++ profile = aa_get_profile(current); ++ if (profile) ++ error = aa_net_perm(profile, "socket_create", family, ++ type, protocol); ++ aa_put_profile(profile); ++ ++ return error; ++} ++ ++static int apparmor_socket_post_create(struct socket *sock, int family, ++ int type, int protocol, int kern) ++{ ++ struct sock *sk = sock->sk; ++ ++ if (kern) ++ return 0; ++ ++ return aa_revalidate_sk(sk, "socket_post_create"); ++} ++ ++static int apparmor_socket_bind(struct socket *sock, ++ struct sockaddr *address, int addrlen) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_bind"); ++} ++ ++static int apparmor_socket_connect(struct socket *sock, ++ struct sockaddr *address, int addrlen) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_connect"); ++} ++ ++static int apparmor_socket_listen(struct socket *sock, int backlog) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_listen"); ++} ++ ++static int apparmor_socket_accept(struct socket *sock, struct socket *newsock) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_accept"); ++} ++ ++static int apparmor_socket_sendmsg(struct socket *sock, ++ struct msghdr *msg, int size) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_sendmsg"); ++} ++ ++static int apparmor_socket_recvmsg(struct socket *sock, ++ struct msghdr *msg, int size, int flags) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_recvmsg"); ++} ++ ++static int apparmor_socket_getsockname(struct socket *sock) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_getsockname"); ++} ++ ++static int apparmor_socket_getpeername(struct socket *sock) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_getpeername"); ++} ++ ++static int apparmor_socket_getsockopt(struct socket *sock, int level, ++ int optname) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_getsockopt"); ++} ++ ++static int apparmor_socket_setsockopt(struct socket *sock, int level, ++ int optname) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_setsockopt"); ++} ++ ++static int apparmor_socket_shutdown(struct socket *sock, int how) ++{ ++ struct sock *sk = sock->sk; ++ ++ return aa_revalidate_sk(sk, "socket_shutdown"); ++} ++ + static int apparmor_getprocattr(struct task_struct *task, char *name, + char **value) + { +@@ -780,9 +892,6 @@ struct security_operations apparmor_ops + .capable = apparmor_capable, + .syslog = cap_syslog, + +- .netlink_send = cap_netlink_send, +- .netlink_recv = cap_netlink_recv, +- + .bprm_apply_creds = cap_bprm_apply_creds, + .bprm_set_security = apparmor_bprm_set_security, + .bprm_secureexec = apparmor_bprm_secureexec, +@@ -820,6 +929,20 @@ struct security_operations apparmor_ops + + .getprocattr = apparmor_getprocattr, + .setprocattr = apparmor_setprocattr, ++ ++ .socket_create = apparmor_socket_create, ++ .socket_post_create = apparmor_socket_post_create, ++ .socket_bind = apparmor_socket_bind, ++ .socket_connect = apparmor_socket_connect, ++ .socket_listen = apparmor_socket_listen, ++ .socket_accept = apparmor_socket_accept, ++ .socket_sendmsg = apparmor_socket_sendmsg, ++ .socket_recvmsg = apparmor_socket_recvmsg, ++ .socket_getsockname = apparmor_socket_getsockname, ++ .socket_getpeername = apparmor_socket_getpeername, ++ .socket_getsockopt = apparmor_socket_getsockopt, ++ .socket_setsockopt = apparmor_socket_setsockopt, ++ .socket_shutdown = apparmor_socket_shutdown, + }; + + void info_message(const char *str) +--- a/security/apparmor/main.c ++++ b/security/apparmor/main.c +@@ -14,6 +14,9 @@ + #include + #include + #include ++#include ++#include ++#include + + #include "apparmor.h" + +@@ -116,6 +119,24 @@ static void aa_audit_file_mask(struct au + audit_log_format(ab, " %s=\"%s::%s\"", name, user, other); + } + ++static const char *address_families[] = { ++#include "af_names.h" ++}; ++ ++static const char *sock_types[] = { ++ "unknown(0)", ++ "stream", ++ "dgram", ++ "raw", ++ "rdm", ++ "seqpacket", ++ "dccp", ++ "unknown(7)", ++ "unknown(8)", ++ "unknown(9)", ++ "packet", ++}; ++ + /** + * aa_audit - Log an audit event to the audit subsystem + * @profile: profile to check against +@@ -187,7 +208,25 @@ static int aa_audit_base(struct aa_profi + audit_log_untrustedstring(ab, sa->name2); + } + +- audit_log_format(ab, " pid=%d", current->pid); ++ if (sa->family || sa->type) { ++ if (address_families[sa->family]) ++ audit_log_format(ab, " family=\"%s\"", ++ address_families[sa->family]); ++ else ++ audit_log_format(ab, " family=\"unknown(%d)\"", ++ sa->family); ++ ++ if (sock_types[sa->type]) ++ audit_log_format(ab, " sock_type=\"%s\"", ++ sock_types[sa->type]); ++ else ++ audit_log_format(ab, " sock_type=\"unknown(%d)\"", ++ sa->type); ++ ++ audit_log_format(ab, " protocol=%d", sa->protocol); ++ } ++ ++ audit_log_format(ab, " pid=%d", current->pid); + + if (profile) { + audit_log_format(ab, " profile="); +@@ -767,6 +806,72 @@ int aa_link(struct aa_profile *profile, + + return error; + } ++ ++int aa_net_perm(struct aa_profile *profile, char *operation, ++ int family, int type, int protocol) ++{ ++ struct aa_audit sa; ++ int error = 0; ++ u16 family_mask, audit_mask, quiet_mask; ++ ++ if ((family < 0) || (family >= AF_MAX)) ++ return -EINVAL; ++ ++ if ((type < 0) || (type >= SOCK_MAX)) ++ return -EINVAL; ++ ++ /* unix domain and netlink sockets are handled by ipc */ ++ if (family == AF_UNIX || family == AF_NETLINK) ++ return 0; ++ ++ family_mask = profile->network_families[family]; ++ audit_mask = profile->audit_network[family]; ++ quiet_mask = profile->quiet_network[family]; ++ ++ error = (family_mask & (1 << type)) ? 0 : -EACCES; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = operation; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.family = family; ++ sa.type = type; ++ sa.protocol = protocol; ++ sa.error_code = error; ++ ++ if (likely(!error)) { ++ if (!PROFILE_AUDIT(profile) && !(family_mask & audit_mask)) ++ return 0; ++ } else if (!((1 << type) & ~quiet_mask)) { ++ return error; ++ } ++ ++ error = aa_audit(profile, &sa); ++ ++ return error; ++} ++ ++int aa_revalidate_sk(struct sock *sk, char *operation) ++{ ++ struct aa_profile *profile; ++ int error = 0; ++ ++ /* this is some debugging code to flush out the network hooks that ++ that are called in interrupt context */ ++ if (in_interrupt()) { ++ printk("AppArmor Debug: Hook being called from interrupt context\n"); ++ dump_stack(); ++ return 0; ++ } ++ ++ profile = aa_get_profile(current); ++ if (profile) ++ error = aa_net_perm(profile, operation, ++ sk->sk_family, sk->sk_type, ++ sk->sk_protocol); ++ aa_put_profile(profile); ++ ++ return error; ++} + + /******************************* + * Global task related functions +--- a/security/apparmor/module_interface.c ++++ b/security/apparmor/module_interface.c +@@ -321,8 +321,8 @@ static struct aa_profile *aa_unpack_prof + struct aa_audit *sa) + { + struct aa_profile *profile = NULL; +- +- int error = -EPROTO; ++ size_t size = 0; ++ int i, error = -EPROTO; + + profile = alloc_aa_profile(); + if (!profile) +@@ -355,6 +355,28 @@ static struct aa_profile *aa_unpack_prof + if (!aa_is_u32(e, &(profile->set_caps), NULL)) + goto fail; + ++ size = aa_is_array(e, "net_allowed_af"); ++ if (size) { ++ if (size > AF_MAX) ++ goto fail; ++ ++ for (i = 0; i < size; i++) { ++ if (!aa_is_u16(e, &profile->network_families[i], NULL)) ++ goto fail; ++ if (!aa_is_u16(e, &profile->audit_network[i], NULL)) ++ goto fail; ++ if (!aa_is_u16(e, &profile->quiet_network[i], NULL)) ++ goto fail; ++ } ++ if (!aa_is_nameX(e, AA_ARRAYEND, NULL)) ++ goto fail; ++ /* allow unix domain and netlink sockets they are handled ++ * by IPC ++ */ ++ } ++ profile->network_families[AF_UNIX] = 0xffff; ++ profile->network_families[AF_NETLINK] = 0xffff; ++ + /* get file rules */ + profile->file_rules = aa_unpack_dfa(e); + if (IS_ERR(profile->file_rules)) { diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-ptrace-2.6.27.diff b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-ptrace-2.6.27.diff new file mode 100644 index 0000000000..c80f740886 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-ptrace-2.6.27.diff @@ -0,0 +1,55 @@ +From: Jeff Mahoney +Subject: apparmor: use new ptrace security_operations + + This patch implements the new ptrace security_operations members. + + ->ptrace was changed to ->ptrace_may_access and ->ptrace_traceme. + + The apparmor versions are really just wrappers for the old function. + +Signed-off-by: Jeff Mahoney + +--- + security/apparmor/lsm.c | 17 +++++++++++++++-- + 1 file changed, 15 insertions(+), 2 deletions(-) + +--- a/security/apparmor/lsm.c ++++ b/security/apparmor/lsm.c +@@ -158,7 +158,7 @@ static int aa_reject_syscall(struct task + } + + static int apparmor_ptrace(struct task_struct *parent, +- struct task_struct *child, unsigned int mode) ++ struct task_struct *child) + { + struct aa_task_context *cxt; + int error = 0; +@@ -207,6 +207,18 @@ static int apparmor_ptrace(struct task_s + return error; + } + ++static int apparmor_ptrace_may_access(struct task_struct *child, ++ unsigned int mode) ++{ ++ return apparmor_ptrace(child->parent, child); ++} ++ ++ ++static int apparmor_ptrace_traceme(struct task_struct *parent) ++{ ++ return apparmor_ptrace(parent, current); ++} ++ + static int apparmor_capable(struct task_struct *task, int cap) + { + int error; +@@ -899,7 +911,8 @@ static int apparmor_task_setrlimit(unsig + } + + struct security_operations apparmor_ops = { +- .ptrace = apparmor_ptrace, ++ .ptrace_may_access = apparmor_ptrace_may_access, ++ .ptrace_traceme = apparmor_ptrace_traceme, + .capget = cap_capget, + .capset_check = cap_capset_check, + .capset_set = cap_capset_set, diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-rlimits.diff b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-rlimits.diff new file mode 100644 index 0000000000..f3912fddbe --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/apparmor-rlimits.diff @@ -0,0 +1,461 @@ +From: John Johansen +Subject: AppArmor: per profile controls for system rlimits + +Provide contol of rlimits on a per profile basis. Each profile provides +a per limit contol and corresponding hard limit value, such that when a +profile becomes attached to a task it sets the tasks limits to be <= to +the profiles specified limits. Note: the profile limit value will not +raise a tasks limit if it is already less than the profile mandates. + +In addition to setting a tasks limits, the ability to set limits on +a confined task are controlled. AppArmor only controls the raising +of a tasks limits Tasks with CAP_SYS_RESOURCE can have their hard limits +raised up to the value specified by the profile. AppArmor does not +prevent a task for lowering its hard limits, nor does it provide +additional control on soft limits. + +AppArmor only controls the limits specified in a profile so that +any limit not specified is free to be modified subject to standard +linux limitations. + +--- + security/apparmor/apparmor.h | 23 ++++++ + security/apparmor/apparmorfs.c | 2 + security/apparmor/lsm.c | 16 ++++ + security/apparmor/main.c | 132 +++++++++++++++++++++++++++++++---- + security/apparmor/module_interface.c | 56 ++++++++++++++ + 5 files changed, 215 insertions(+), 14 deletions(-) + +--- a/security/apparmor/apparmor.h ++++ b/security/apparmor/apparmor.h +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -139,6 +140,18 @@ extern unsigned int apparmor_path_max; + + #define AA_ERROR(fmt, args...) printk(KERN_ERR "AppArmor: " fmt, ##args) + ++/* struct aa_rlimit - rlimits settings for the profile ++ * @mask: which hard limits to set ++ * @limits: rlimit values that override task limits ++ * ++ * AppArmor rlimits are used to set confined task rlimits. Only the ++ * limits specified in @mask will be controlled by apparmor. ++ */ ++struct aa_rlimit { ++ unsigned int mask; ++ struct rlimit limits[RLIM_NLIMITS]; ++}; ++ + struct aa_profile; + + /* struct aa_namespace - namespace for a set of profiles +@@ -173,6 +186,8 @@ struct aa_namespace { + * @audit_caps: caps that are to be audited + * @quiet_caps: caps that should not be audited + * @capabilities: capabilities granted by the process ++ * @rlimits: rlimits for the profile ++ * @task_count: how many tasks the profile is attached to + * @count: reference count of the profile + * @task_contexts: list of tasks confined by profile + * @lock: lock for the task_contexts list +@@ -210,6 +225,9 @@ struct aa_profile { + kernel_cap_t audit_caps; + kernel_cap_t quiet_caps; + ++ struct aa_rlimit rlimits; ++ unsigned int task_count; ++ + struct kref count; + struct list_head task_contexts; + spinlock_t lock; +@@ -261,6 +279,7 @@ struct aa_audit { + const char *name2; + const char *name3; + int request_mask, denied_mask, audit_mask; ++ int rlimit; + struct iattr *iattr; + pid_t task, parent; + int family, type, protocol; +@@ -328,6 +347,10 @@ extern int aa_may_ptrace(struct aa_task_ + extern int aa_net_perm(struct aa_profile *profile, char *operation, + int family, int type, int protocol); + extern int aa_revalidate_sk(struct sock *sk, char *operation); ++extern int aa_task_setrlimit(struct aa_profile *profile, unsigned int resource, ++ struct rlimit *new_rlim); ++extern void aa_set_rlimits(struct task_struct *task, struct aa_profile *profile); ++ + + /* lsm.c */ + extern int apparmor_initialized; +--- a/security/apparmor/apparmorfs.c ++++ b/security/apparmor/apparmorfs.c +@@ -106,7 +106,7 @@ static ssize_t aa_features_read(struct f + { + const char *features = "file=3.0 capability=2.0 network=1.0 " + "change_hat=1.5 change_profile=1.0 " +- "aanamespaces=1.0"; ++ "aanamespaces=1.0 rlimit=1.0"; + + return simple_read_from_buffer(buf, size, ppos, features, + strlen(features)); +--- a/security/apparmor/lsm.c ++++ b/security/apparmor/lsm.c +@@ -883,6 +883,21 @@ static int apparmor_setprocattr(struct t + return error; + } + ++static int apparmor_task_setrlimit(unsigned int resource, ++ struct rlimit *new_rlim) ++{ ++ struct aa_profile *profile; ++ int error = 0; ++ ++ profile = aa_get_profile(current); ++ if (profile) { ++ error = aa_task_setrlimit(profile, resource, new_rlim); ++ } ++ aa_put_profile(profile); ++ ++ return error; ++} ++ + struct security_operations apparmor_ops = { + .ptrace = apparmor_ptrace, + .capget = cap_capget, +@@ -926,6 +941,7 @@ struct security_operations apparmor_ops + .task_free_security = apparmor_task_free_security, + .task_post_setuid = cap_task_post_setuid, + .task_reparent_to_init = cap_task_reparent_to_init, ++ .task_setrlimit = apparmor_task_setrlimit, + + .getprocattr = apparmor_getprocattr, + .setprocattr = apparmor_setprocattr, +--- a/security/apparmor/main.c ++++ b/security/apparmor/main.c +@@ -177,6 +177,9 @@ static int aa_audit_base(struct aa_profi + if (sa->request_mask) + audit_log_format(ab, " fsuid=%d", current->fsuid); + ++ if (sa->rlimit) ++ audit_log_format(ab, " rlimit=%d", sa->rlimit - 1); ++ + if (sa->iattr) { + struct iattr *iattr = sa->iattr; + +@@ -872,6 +875,79 @@ int aa_revalidate_sk(struct sock *sk, ch + + return error; + } ++/** ++ * aa_task_setrlimit - test permission to set an rlimit ++ * @profile - profile confining the task ++ * @resource - the resource being set ++ * @new_rlim - the new resource limit ++ * ++ * Control raising the processes hard limit. ++ */ ++int aa_task_setrlimit(struct aa_profile *profile, unsigned int resource, ++ struct rlimit *new_rlim) ++{ ++ struct aa_audit sa; ++ int error = 0; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "setrlimit"; ++ sa.gfp_mask = GFP_KERNEL; ++ sa.rlimit = resource + 1; ++ ++ if (profile->rlimits.mask & (1 << resource) && ++ new_rlim->rlim_max > profile->rlimits.limits[resource].rlim_max) { ++ sa.error_code = -EACCES; ++ ++ error = aa_audit(profile, &sa); ++ } ++ ++ return error; ++} ++ ++static int aa_rlimit_nproc(struct aa_profile *profile) { ++ if (profile && (profile->rlimits.mask & (1 << RLIMIT_NPROC)) && ++ profile->task_count >= profile->rlimits.limits[RLIMIT_NPROC].rlim_max) ++ return -EAGAIN; ++ return 0; ++} ++ ++void aa_set_rlimits(struct task_struct *task, struct aa_profile *profile) ++{ ++ int i, mask; ++ ++ if (!profile) ++ return; ++ ++ if (!profile->rlimits.mask) ++ return; ++ ++ task_lock(task->group_leader); ++ mask = 1; ++ for (i = 0; i < RLIM_NLIMITS; i++, mask <<= 1) { ++ struct rlimit new_rlim, *old_rlim; ++ ++ /* check to see if NPROC which is per profile and handled ++ * in clone/exec or whether this is a limit to be set ++ * can't set cpu limit either right now ++ */ ++ if (i == RLIMIT_NPROC || i == RLIMIT_CPU) ++ continue; ++ ++ old_rlim = task->signal->rlim + i; ++ new_rlim = *old_rlim; ++ ++ if (mask & profile->rlimits.mask && ++ profile->rlimits.limits[i].rlim_max < new_rlim.rlim_max) { ++ new_rlim.rlim_max = profile->rlimits.limits[i].rlim_max; ++ /* soft limit should not exceed hard limit */ ++ if (new_rlim.rlim_cur > new_rlim.rlim_max) ++ new_rlim.rlim_cur = new_rlim.rlim_max; ++ } ++ ++ *old_rlim = new_rlim; ++ } ++ task_unlock(task->group_leader); ++} + + /******************************* + * Global task related functions +@@ -885,6 +961,7 @@ int aa_revalidate_sk(struct sock *sk, ch + */ + int aa_clone(struct task_struct *child) + { ++ struct aa_audit sa; + struct aa_task_context *cxt, *child_cxt; + struct aa_profile *profile; + +@@ -894,6 +971,11 @@ int aa_clone(struct task_struct *child) + if (!child_cxt) + return -ENOMEM; + ++ memset(&sa, 0, sizeof(sa)); ++ sa.operation = "clone"; ++ sa.task = child->pid; ++ sa.gfp_mask = GFP_KERNEL; ++ + repeat: + profile = aa_get_profile(current); + if (profile) { +@@ -910,18 +992,22 @@ repeat: + goto repeat; + } + ++ if (aa_rlimit_nproc(profile)) { ++ sa.info = "rlimit nproc limit exceeded"; ++ unlock_profile(profile); ++ aa_audit_reject(profile, &sa); ++ aa_put_profile(profile); ++ return -EAGAIN; ++ } ++ + /* No need to grab the child's task lock here. */ + aa_change_task_context(child, child_cxt, profile, + cxt->cookie, cxt->previous_profile); ++ + unlock_profile(profile); + + if (APPARMOR_COMPLAIN(child_cxt) && + profile == profile->ns->null_complain_profile) { +- struct aa_audit sa; +- memset(&sa, 0, sizeof(sa)); +- sa.operation = "clone"; +- sa.gfp_mask = GFP_KERNEL; +- sa.task = child->pid; + aa_audit_hint(profile, &sa); + } + aa_put_profile(profile); +@@ -1156,6 +1242,10 @@ repeat: + sa.task = current->parent->pid; + aa_audit_reject(profile, &sa); + } ++ if (PTR_ERR(old_profile) == -EAGAIN) { ++ sa.info = "rlimit nproc limit exceeded"; ++ aa_audit_reject(profile, &sa); ++ } + new_profile = old_profile; + goto cleanup; + } +@@ -1303,6 +1393,12 @@ static int do_change_profile(struct aa_p + goto out; + } + ++ if ((error = aa_rlimit_nproc(new_profile))) { ++ sa->info = "rlimit nproc limit exceeded"; ++ aa_audit_reject(cxt->profile, sa); ++ goto out; ++ } ++ + if (new_profile == ns->null_complain_profile) + aa_audit_hint(cxt->profile, sa); + +@@ -1481,17 +1577,18 @@ struct aa_profile *__aa_replace_profile( + + cxt = lock_task_and_profiles(task, profile); + if (unlikely(profile && profile->isstale)) { +- task_unlock(task); +- unlock_both_profiles(profile, cxt ? cxt->profile : NULL); +- aa_free_task_context(new_cxt); +- return ERR_PTR(-ESTALE); ++ old_profile = ERR_PTR(-ESTALE); ++ goto error; + } + + if ((current->ptrace & PT_PTRACED) && aa_may_ptrace(cxt, profile)) { +- task_unlock(task); +- unlock_both_profiles(profile, cxt ? cxt->profile : NULL); +- aa_free_task_context(new_cxt); +- return ERR_PTR(-EPERM); ++ old_profile = ERR_PTR(-EPERM); ++ goto error; ++ } ++ ++ if (aa_rlimit_nproc(profile)) { ++ old_profile = ERR_PTR(-EAGAIN); ++ goto error; + } + + if (cxt) +@@ -1499,8 +1596,15 @@ struct aa_profile *__aa_replace_profile( + aa_change_task_context(task, new_cxt, profile, 0, NULL); + + task_unlock(task); ++ aa_set_rlimits(task, profile); + unlock_both_profiles(profile, old_profile); + return old_profile; ++ ++error: ++ task_unlock(task); ++ unlock_both_profiles(profile, cxt ? cxt->profile : NULL); ++ aa_free_task_context(new_cxt); ++ return old_profile; + } + + /** +@@ -1565,6 +1669,7 @@ void aa_change_task_context(struct task_ + + if (old_cxt) { + list_del_init(&old_cxt->list); ++ old_cxt->profile->task_count--; + call_rcu(&old_cxt->rcu, free_aa_task_context_rcu_callback); + } + if (new_cxt) { +@@ -1576,6 +1681,7 @@ void aa_change_task_context(struct task_ + new_cxt->cookie = cookie; + new_cxt->task = task; + new_cxt->profile = aa_dup_profile(profile); ++ profile->task_count++; + new_cxt->previous_profile = aa_dup_profile(previous_profile); + list_move(&new_cxt->list, &profile->task_contexts); + } +--- a/security/apparmor/module_interface.c ++++ b/security/apparmor/module_interface.c +@@ -177,6 +177,22 @@ fail: + return 0; + } + ++static int aa_is_u64(struct aa_ext *e, u64 *data, const char *name) ++{ ++ void *pos = e->pos; ++ if (aa_is_nameX(e, AA_U64, name)) { ++ if (!aa_inbounds(e, sizeof(u64))) ++ goto fail; ++ if (data) ++ *data = le64_to_cpu(get_unaligned((u64 *)e->pos)); ++ e->pos += sizeof(u64); ++ return 1; ++ } ++fail: ++ e->pos = pos; ++ return 0; ++} ++ + static size_t aa_is_array(struct aa_ext *e, const char *name) + { + void *pos = e->pos; +@@ -312,6 +328,39 @@ fail: + return 0; + } + ++int aa_unpack_rlimits(struct aa_ext *e, struct aa_profile *profile) ++{ ++ void *pos = e->pos; ++ ++ /* rlimits are optional */ ++ if (aa_is_nameX(e, AA_STRUCT, "rlimits")) { ++ int i, size; ++ u32 tmp = 0; ++ if (!aa_is_u32(e, &tmp, NULL)) ++ goto fail; ++ profile->rlimits.mask = tmp; ++ ++ size = aa_is_array(e, NULL); ++ if (size > RLIM_NLIMITS) ++ goto fail; ++ for (i = 0; i < size; i++) { ++ u64 tmp = 0; ++ if (!aa_is_u64(e, &tmp, NULL)) ++ goto fail; ++ profile->rlimits.limits[i].rlim_max = tmp; ++ } ++ if (!aa_is_nameX(e, AA_ARRAYEND, NULL)) ++ goto fail; ++ if (!aa_is_nameX(e, AA_STRUCTEND, NULL)) ++ goto fail; ++ } ++ return 1; ++ ++fail: ++ e->pos = pos; ++ return 0; ++} ++ + /** + * aa_unpack_profile - unpack a serialized profile + * @e: serialized data extent information +@@ -355,6 +404,9 @@ static struct aa_profile *aa_unpack_prof + if (!aa_is_u32(e, &(profile->set_caps), NULL)) + goto fail; + ++ if (!aa_unpack_rlimits(e, profile)) ++ goto fail; ++ + size = aa_is_array(e, "net_allowed_af"); + if (size) { + if (size > AF_MAX) +@@ -614,6 +666,8 @@ ssize_t aa_replace_profile(void *udata, + sa.operation = "profile_load"; + goto out; + } ++ /* do not fail replacement based off of profile's NPROC rlimit */ ++ + /* + * Replacement needs to allocate a new aa_task_context for each + * task confined by old_profile. To do this the profile locks +@@ -634,6 +688,7 @@ ssize_t aa_replace_profile(void *udata, + task_lock(task); + task_replace(task, new_cxt, new_profile); + task_unlock(task); ++ aa_set_rlimits(task, new_profile); + new_cxt = NULL; + } + unlock_both_profiles(old_profile, new_profile); +@@ -656,6 +711,7 @@ out: + * + * remove a profile from the profile list and all aa_task_context references + * to said profile. ++ * NOTE: removing confinement does not restore rlimits to preconfinemnet values + */ + ssize_t aa_remove_profile(char *name, size_t size) + { diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/d_namespace_path.diff b/src/patches/suse-2.6.27.25/patches.apparmor/d_namespace_path.diff new file mode 100644 index 0000000000..d5b2dc7075 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/d_namespace_path.diff @@ -0,0 +1,60 @@ +From: Andreas Gruenbacher +Subject: Add d_namespace_path() to compute namespace relative pathnames + +In AppArmor, we are interested in pathnames relative to the namespace root. +This is the same as d_path() except for the root where the search ends. Add +a function for computing the namespace-relative path. + +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/namespace.c | 30 ++++++++++++++++++++++++++++++ + include/linux/mount.h | 2 ++ + 2 files changed, 32 insertions(+) + +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -2299,3 +2299,33 @@ void __put_mnt_ns(struct mnt_namespace * + release_mounts(&umount_list); + kfree(ns); + } ++ ++char *d_namespace_path(struct dentry *dentry, struct vfsmount *vfsmnt, ++ char *buf, int buflen) ++{ ++ struct path root, tmp, ns_root = { }; ++ struct path path = { .mnt = vfsmnt, .dentry = dentry }; ++ char *res; ++ ++ read_lock(¤t->fs->lock); ++ root = current->fs->root; ++ path_get(¤t->fs->root); ++ read_unlock(¤t->fs->lock); ++ spin_lock(&vfsmount_lock); ++ if (root.mnt) ++ ns_root.mnt = mntget(root.mnt->mnt_ns->root); ++ if (ns_root.mnt) ++ ns_root.dentry = dget(ns_root.mnt->mnt_root); ++ spin_unlock(&vfsmount_lock); ++ tmp = ns_root; ++ res = __d_path(&path, &tmp, buf, buflen, ++ D_PATH_FAIL_DELETED | D_PATH_DISCONNECT); ++ path_put(&root); ++ path_put(&ns_root); ++ ++ /* Prevent empty path for lazily unmounted filesystems. */ ++ if (!IS_ERR(res) && *res == '\0') ++ *--res = '.'; ++ return res; ++} ++EXPORT_SYMBOL(d_namespace_path); +--- a/include/linux/mount.h ++++ b/include/linux/mount.h +@@ -134,4 +134,6 @@ extern void mark_mounts_for_expiry(struc + extern spinlock_t vfsmount_lock; + extern dev_t name_to_dev_t(char *name); + ++extern char *d_namespace_path(struct dentry *, struct vfsmount *, char *, int); ++ + #endif /* _LINUX_MOUNT_H */ diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/d_namespace_path_oops_fix.diff b/src/patches/suse-2.6.27.25/patches.apparmor/d_namespace_path_oops_fix.diff new file mode 100644 index 0000000000..ea3697c2a1 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/d_namespace_path_oops_fix.diff @@ -0,0 +1,25 @@ +From: Miklos Szeredi +Subject: fix oops in d_namespace_path +Patch-mainline: no +References: bnc#433504 + +d_namespace_path uses the current->fs->root to get the current +namespace. If root is detached root.mnt->mnt_ns will be NULL, causing +an Oops. Fix by checking this before dereferencing the mnt_ns. + +Signed-off-by: Miklos Szeredi +--- + fs/namespace.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -2312,7 +2312,7 @@ char *d_namespace_path(struct dentry *de + path_get(¤t->fs->root); + read_unlock(¤t->fs->lock); + spin_lock(&vfsmount_lock); +- if (root.mnt) ++ if (root.mnt && root.mnt->mnt_ns) + ns_root.mnt = mntget(root.mnt->mnt_ns->root); + if (ns_root.mnt) + ns_root.dentry = dget(ns_root.mnt->mnt_root); diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/do_path_lookup-nameidata.diff b/src/patches/suse-2.6.27.25/patches.apparmor/do_path_lookup-nameidata.diff new file mode 100644 index 0000000000..44a21b94e1 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/do_path_lookup-nameidata.diff @@ -0,0 +1,42 @@ +From: Andreas Gruenbacher +Subject: Switch to vfs_permission() in do_path_lookup() + +Switch from file_permission() to vfs_permission() in do_path_lookup(): +this avoids calling permission() with a NULL nameidata here. + +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/namei.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -1085,24 +1085,21 @@ static int do_path_lookup(int dfd, const + path_get(&fs->pwd); + read_unlock(&fs->lock); + } else { +- struct dentry *dentry; +- + file = fget_light(dfd, &fput_needed); + retval = -EBADF; + if (!file) + goto out_fail; + +- dentry = file->f_path.dentry; ++ nd->path = file->f_path; + + retval = -ENOTDIR; +- if (!S_ISDIR(dentry->d_inode->i_mode)) ++ if (!S_ISDIR(nd->path.dentry->d_inode->i_mode)) + goto fput_fail; + + retval = file_permission(file, MAY_EXEC); + if (retval) + goto fput_fail; + +- nd->path = file->f_path; + path_get(&file->f_path); + + fput_light(file, fput_needed); diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/file-handle-ops.diff b/src/patches/suse-2.6.27.25/patches.apparmor/file-handle-ops.diff new file mode 100644 index 0000000000..2e921f9467 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/file-handle-ops.diff @@ -0,0 +1,84 @@ +From: Andreas Gruenbacher +Subject: Enable LSM hooks to distinguish operations on file descriptors from operations on pathnames + +Struct iattr already contains ia_file since commit cc4e69de from +Miklos (which is related to commit befc649c). Use this to pass +struct file down the setattr hooks. This allows LSMs to distinguish +operations on file descriptors from operations on paths. + +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen +Cc: Miklos Szeredi + +--- + fs/nfsd/vfs.c | 12 +++++++----- + fs/open.c | 5 ++++- + 2 files changed, 11 insertions(+), 6 deletions(-) + +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -425,7 +425,7 @@ static ssize_t nfsd_getxattr(struct dent + { + ssize_t buflen; + +- buflen = vfs_getxattr(dentry, mnt, key, NULL, 0); ++ buflen = vfs_getxattr(dentry, mnt, key, NULL, 0, NULL); + if (buflen <= 0) + return buflen; + +@@ -433,7 +433,7 @@ static ssize_t nfsd_getxattr(struct dent + if (!*buf) + return -ENOMEM; + +- return vfs_getxattr(dentry, mnt, key, *buf, buflen); ++ return vfs_getxattr(dentry, mnt, key, *buf, buflen, NULL); + } + #endif + +@@ -459,7 +459,7 @@ set_nfsv4_acl_one(struct dentry *dentry, + goto out; + } + +- error = vfs_setxattr(dentry, mnt, key, buf, len, 0); ++ error = vfs_setxattr(dentry, mnt, key, buf, len, 0, NULL); + out: + kfree(buf); + return error; +@@ -2133,12 +2133,14 @@ nfsd_set_posix_acl(struct svc_fh *fhp, i + if (error) + goto getout; + if (size) +- error = vfs_setxattr(fhp->fh_dentry, mnt, name, value, size,0); ++ error = vfs_setxattr(fhp->fh_dentry, mnt, name, value, size, 0, ++ NULL); + else { + if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) + error = 0; + else { +- error = vfs_removexattr(fhp->fh_dentry, mnt, name); ++ error = vfs_removexattr(fhp->fh_dentry, mnt, name, ++ NULL); + if (error == -ENODATA) + error = 0; + } +--- a/fs/open.c ++++ b/fs/open.c +@@ -623,7 +623,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd + if (mode == (mode_t) -1) + mode = inode->i_mode; + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); +- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; ++ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME | ATTR_FILE; + err = fnotify_change(dentry, file->f_path.mnt, &newattrs, file); + mutex_unlock(&inode->i_mutex); + mnt_drop_write(file->f_path.mnt); +@@ -686,6 +686,9 @@ static int chown_common(struct dentry * + if (!S_ISDIR(inode->i_mode)) + newattrs.ia_valid |= + ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; ++ if (file) ++ newattrs.ia_valid |= ATTR_FILE; ++ + mutex_lock(&inode->i_mutex); + error = fnotify_change(dentry, mnt, &newattrs, file); + mutex_unlock(&inode->i_mutex); diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/fix-complain.diff b/src/patches/suse-2.6.27.25/patches.apparmor/fix-complain.diff new file mode 100644 index 0000000000..85feac1018 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/fix-complain.diff @@ -0,0 +1,26 @@ +From: John Johansen +Subject: fix enforcement of deny rules in complain mode +Patch-mainline: no +References: bnc#426159 + +Fix enforcement of deny rules so that they are not enforced in complain +mode. This is necessary so that application behavior is not changed by +the presence of the deny rule. + +Signed-off-by: John Johansen + +--- + security/apparmor/main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/security/apparmor/main.c ++++ b/security/apparmor/main.c +@@ -325,7 +325,7 @@ static int aa_audit_file(struct aa_profi + } else { + int mask = AUDIT_QUIET_MASK(sa->audit_mask); + +- if (!(sa->denied_mask & ~mask)) ++ if (!(sa->denied_mask & ~mask) && !PROFILE_COMPLAIN(profile)) + return sa->error_code; + + /* mask off perms whose denial is being silenced */ diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/fix-deleted-leak.diff b/src/patches/suse-2.6.27.25/patches.apparmor/fix-deleted-leak.diff new file mode 100644 index 0000000000..f70fedc451 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/fix-deleted-leak.diff @@ -0,0 +1,25 @@ +From: John Johansen +Subject: [PATCH] AppArmor: Fix leak of filename for deleted files + + This patch fixes a memory leak where the name doesn't get freed when + a file has been deleted. + +Signed-off-by: Jeff Mahoney +--- + security/apparmor/main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/security/apparmor/main.c ++++ b/security/apparmor/main.c +@@ -500,10 +500,10 @@ static char *aa_get_name(struct dentry * + *buffer = buf; + return name; + } ++ kfree(buf); + if (PTR_ERR(name) != -ENAMETOOLONG) + return name; + +- kfree(buf); + size <<= 1; + if (size > apparmor_path_max) + return ERR_PTR(-ENAMETOOLONG); diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/fix-security-param.diff b/src/patches/suse-2.6.27.25/patches.apparmor/fix-security-param.diff new file mode 100644 index 0000000000..8d03cbceec --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/fix-security-param.diff @@ -0,0 +1,66 @@ +From: John Johansen +Subject: fix recognition of security= boot parameter +Patch-mainline: no +References: bnc#442668 + +Fix AppArmor to respect the kernel boot parameter security=, so that if a +different lsm is choosen apparmor does not try to register its lsm hooks. + +Signed-off-by: John Johansen + +--- + security/Kconfig | 9 +++++++++ + security/apparmor/lsm.c | 5 +++-- + security/security.c | 2 +- + 3 files changed, 13 insertions(+), 3 deletions(-) + +--- a/security/Kconfig ++++ b/security/Kconfig +@@ -51,6 +51,15 @@ config SECURITY + + If you are unsure how to answer this question, answer N. + ++config SECURITY_DEFAULT ++ string "Default security module" ++ depends on SECURITY ++ default "" ++ help ++ This determines the security module used if the security= ++ boot parmater is not provided. If a security module is not ++ specified the first module to register will be used. ++ + config SECURITY_NETWORK + bool "Socket and Networking Security Hooks" + depends on SECURITY +--- a/security/apparmor/lsm.c ++++ b/security/apparmor/lsm.c +@@ -911,6 +911,7 @@ static int apparmor_task_setrlimit(unsig + } + + struct security_operations apparmor_ops = { ++ .name = "apparmor", + .ptrace_may_access = apparmor_ptrace_may_access, + .ptrace_traceme = apparmor_ptrace_traceme, + .capget = cap_capget, +@@ -989,8 +990,8 @@ static int __init apparmor_init(void) + { + int error; + +- if (!apparmor_enabled) { +- info_message("AppArmor disabled by boottime parameter\n"); ++ if (!apparmor_enabled || !security_module_enable(&apparmor_ops)) { ++ info_message("AppArmor disabled by boot time parameter\n"); + return 0; + } + +--- a/security/security.c ++++ b/security/security.c +@@ -18,7 +18,7 @@ + #include + + /* Boot-time LSM user choice */ +-static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1]; ++static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] = CONFIG_SECURITY_DEFAULT; + + /* things that live in capability.c */ + extern struct security_operations default_security_ops; diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/fix-vfs_rmdir.diff b/src/patches/suse-2.6.27.25/patches.apparmor/fix-vfs_rmdir.diff new file mode 100644 index 0000000000..141bc64479 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/fix-vfs_rmdir.diff @@ -0,0 +1,44 @@ +From: John Johansen +Subject: Call lsm hook before unhashing dentry in vfs_rmdir() + +If we unhash the dentry before calling the security_inode_rmdir hook, +we cannot compute the file's pathname in the hook anymore. AppArmor +needs to know the filename in order to decide whether a file may be +deleted, though. + +Signed-off-by: John Johansen +Signed-off-by: Andreas Gruenbacher + +--- + fs/namei.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -2177,6 +2177,10 @@ int vfs_rmdir(struct inode *dir, struct + if (!dir->i_op || !dir->i_op->rmdir) + return -EPERM; + ++ error = security_inode_rmdir(dir, dentry, mnt); ++ if (error) ++ return error; ++ + DQUOT_INIT(dir); + + mutex_lock(&dentry->d_inode->i_mutex); +@@ -2184,12 +2188,9 @@ int vfs_rmdir(struct inode *dir, struct + if (d_mountpoint(dentry)) + error = -EBUSY; + else { +- error = security_inode_rmdir(dir, dentry, mnt); +- if (!error) { +- error = dir->i_op->rmdir(dir, dentry); +- if (!error) +- dentry->d_inode->i_flags |= S_DEAD; +- } ++ error = dir->i_op->rmdir(dir, dentry); ++ if (!error) ++ dentry->d_inode->i_flags |= S_DEAD; + } + mutex_unlock(&dentry->d_inode->i_mutex); + if (!error) { diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/fork-tracking.diff b/src/patches/suse-2.6.27.25/patches.apparmor/fork-tracking.diff new file mode 100644 index 0000000000..49cbd88ecb --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/fork-tracking.diff @@ -0,0 +1,108 @@ +From: John Johansen +Subject: fix log messages to enable tools profile learning +Patch-mainline: no +References: bnc#447564 + +The allocation of the child pid is done after the LSM clone hook, which +breaks the AppArmor tools fork tracking, for profiles learning. Output +the parent pid with each log message to enable the tools to handle fork +tracking. + +Signed-off-by: John Johansen + +--- + security/apparmor/lsm.c | 28 ---------------------------- + security/apparmor/main.c | 10 +++++----- + security/apparmor/module_interface.c | 2 +- + 3 files changed, 6 insertions(+), 34 deletions(-) + +--- a/security/apparmor/lsm.c ++++ b/security/apparmor/lsm.c +@@ -143,20 +143,6 @@ static int param_set_aa_enabled(const ch + return 0; + } + +-static int aa_reject_syscall(struct task_struct *task, gfp_t flags, +- const char *name) +-{ +- struct aa_profile *profile = aa_get_profile(task); +- int error = 0; +- +- if (profile) { +- error = aa_audit_syscallreject(profile, flags, name); +- aa_put_profile(profile); +- } +- +- return error; +-} +- + static int apparmor_ptrace(struct task_struct *parent, + struct task_struct *child) + { +@@ -292,17 +278,6 @@ static int apparmor_bprm_secureexec(stru + return ret; + } + +-static int apparmor_sb_mount(char *dev_name, struct path *path, char *type, +- unsigned long flags, void *data) +-{ +- return aa_reject_syscall(current, GFP_KERNEL, "mount"); +-} +- +-static int apparmor_umount(struct vfsmount *mnt, int flags) +-{ +- return aa_reject_syscall(current, GFP_KERNEL, "umount"); +-} +- + static int apparmor_inode_mkdir(struct inode *dir, struct dentry *dentry, + struct vfsmount *mnt, int mask) + { +@@ -925,9 +900,6 @@ struct security_operations apparmor_ops + .bprm_set_security = apparmor_bprm_set_security, + .bprm_secureexec = apparmor_bprm_secureexec, + +- .sb_mount = apparmor_sb_mount, +- .sb_umount = apparmor_umount, +- + .inode_mkdir = apparmor_inode_mkdir, + .inode_rmdir = apparmor_inode_rmdir, + .inode_create = apparmor_inode_create, +--- a/security/apparmor/main.c ++++ b/security/apparmor/main.c +@@ -229,9 +229,13 @@ static int aa_audit_base(struct aa_profi + audit_log_format(ab, " protocol=%d", sa->protocol); + } + +- audit_log_format(ab, " pid=%d", current->pid); ++ audit_log_format(ab, " pid=%d", current->pid); + + if (profile) { ++ if (!sa->parent) ++ audit_log_format(ab, " parent=%d", ++ current->real_parent->pid); ++ + audit_log_format(ab, " profile="); + audit_log_untrustedstring(ab, profile->name); + +@@ -1006,10 +1010,6 @@ repeat: + + unlock_profile(profile); + +- if (APPARMOR_COMPLAIN(child_cxt) && +- profile == profile->ns->null_complain_profile) { +- aa_audit_hint(profile, &sa); +- } + aa_put_profile(profile); + } else + aa_free_task_context(child_cxt); +--- a/security/apparmor/module_interface.c ++++ b/security/apparmor/module_interface.c +@@ -126,7 +126,7 @@ static int aa_is_nameX(struct aa_ext *e, + * AA_NAME tag value is a u16. + */ + if (aa_is_X(e, AA_NAME)) { +- char *tag; ++ char *tag = NULL; + size_t size = aa_is_u16_chunk(e, &tag); + /* if a name is specified it must match. otherwise skip tag */ + if (name && (!size || strcmp(name, tag))) diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/fsetattr-reintro-ATTR_FILE.diff b/src/patches/suse-2.6.27.25/patches.apparmor/fsetattr-reintro-ATTR_FILE.diff new file mode 100644 index 0000000000..a5efdeee74 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/fsetattr-reintro-ATTR_FILE.diff @@ -0,0 +1,28 @@ +From: John Johansen +Subject: AppArmor: reintroduce ATTR_FILE + +The fsetattr patch removed ATTR_FILE but AppArmor needs it to distinguish +file based writes. + +Note: Now that LSMs must be static, it would be better to add a file +pointer argument to security_operations->inode_setattr() instead. Then +move the fs.h chunk to patches.apparmor/fsetattr-restore-ia_file. -jeffm + +Signed-off-by: John Johansen + +--- + fs/open.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/open.c ++++ b/fs/open.c +@@ -208,6 +208,9 @@ int do_truncate(struct dentry *dentry, s + newattrs.ia_size = length; + newattrs.ia_valid = ATTR_SIZE | time_attrs; + ++ if (filp) ++ newattrs.ia_valid |= ATTR_FILE; ++ + /* Remove suid/sgid on truncate too */ + newattrs.ia_valid |= should_remove_suid(dentry); + diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/fsetattr-restore-ia_file.diff b/src/patches/suse-2.6.27.25/patches.apparmor/fsetattr-restore-ia_file.diff new file mode 100644 index 0000000000..5e5bd121fa --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/fsetattr-restore-ia_file.diff @@ -0,0 +1,58 @@ +From: Jeff Mahoney +Subject: [PATCH] vfs: restore ia_file for compatibility with external modules +References: bnc#381259 + + patches.apparmor/fsetattr.diff eliminated ia_file and ATTR_FILE in favor + of providing a ->fsetattr call that used a file pointer. Until this + patch is accepted into mainline, this patch provides the backward + compatibility for external file system modules. + +Signed-off-by: Jeff Mahoney +--- + fs/attr.c | 13 ++++++++++++- + include/linux/fs.h | 11 +++++++++++ + 2 files changed, 23 insertions(+), 1 deletion(-) + +--- a/fs/attr.c ++++ b/fs/attr.c +@@ -168,8 +168,19 @@ int fnotify_change(struct dentry *dentry + if (!error) { + if (file && file->f_op && file->f_op->fsetattr) + error = file->f_op->fsetattr(file, attr); +- else ++ else { ++ /* External file system still expect to be ++ * passed a file pointer via ia_file and ++ * have it announced via ATTR_FILE. This ++ * just makes it so they don't need to ++ * change their API just for us. External ++ * callers will have set these themselves. */ ++ if (file) { ++ attr->ia_valid |= ATTR_FILE; ++ attr->ia_file = file; ++ } + error = inode->i_op->setattr(dentry, attr); ++ } + } + } else { + error = inode_change_ok(inode, attr); +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -367,6 +367,17 @@ struct iattr { + struct timespec ia_atime; + struct timespec ia_mtime; + struct timespec ia_ctime; ++ ++ /* ++ * Not an attribute, but an auxilary info for filesystems wanting to ++ * implement an ftruncate() like method. NOTE: filesystem should ++ * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). ++ * ++ * NOTE: With patches.apparmor/fsetattr.diff applied, this is ++ * for compatibility with external file system modules only. There ++ * should not be any in-kernel users left. ++ */ ++ struct file *ia_file; + }; + + /* diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/fsetattr.diff b/src/patches/suse-2.6.27.25/patches.apparmor/fsetattr.diff new file mode 100644 index 0000000000..ecd58fa4b1 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/fsetattr.diff @@ -0,0 +1,414 @@ +Subject: VFS: new fsetattr() file operation + +From: Miklos Szeredi + +Add a new file operation: f_op->fsetattr(), that is invoked by +ftruncate, fchmod, fchown and utimensat. Fall back to i_op->setattr() +if it is not defined. + +For the reasons why we need this, see patch adding fgetattr(). + +ftruncate() already passed the open file to the filesystem via the +ia_file member of struct iattr. However it is cleaner to have a +separate file operation for this, so remove ia_file, ATTR_FILE and +convert existing users: fuse and AFS. + +Signed-off-by: Miklos Szeredi --- +Signed-off-by: John Johansen --- + +--- + fs/afs/dir.c | 1 + + fs/afs/file.c | 1 + + fs/afs/inode.c | 19 +++++++++++++++---- + fs/afs/internal.h | 1 + + fs/attr.c | 19 +++++++++++++++---- + fs/fuse/dir.c | 20 +++++++++----------- + fs/fuse/file.c | 7 +++++++ + fs/fuse/fuse_i.h | 4 ++++ + fs/open.c | 20 ++++++++------------ + fs/utimes.c | 9 +++++---- + include/linux/fs.h | 9 ++------- + 11 files changed, 68 insertions(+), 42 deletions(-) + +--- a/fs/afs/dir.c ++++ b/fs/afs/dir.c +@@ -45,6 +45,7 @@ const struct file_operations afs_dir_fil + .release = afs_release, + .readdir = afs_readdir, + .lock = afs_lock, ++ .fsetattr = afs_fsetattr, + }; + + const struct inode_operations afs_dir_inode_operations = { +--- a/fs/afs/file.c ++++ b/fs/afs/file.c +@@ -36,6 +36,7 @@ const struct file_operations afs_file_op + .fsync = afs_fsync, + .lock = afs_lock, + .flock = afs_flock, ++ .fsetattr = afs_fsetattr, + }; + + const struct inode_operations afs_file_inode_operations = { +--- a/fs/afs/inode.c ++++ b/fs/afs/inode.c +@@ -358,7 +358,8 @@ void afs_clear_inode(struct inode *inode + /* + * set the attributes of an inode + */ +-int afs_setattr(struct dentry *dentry, struct iattr *attr) ++static int afs_do_setattr(struct dentry *dentry, struct iattr *attr, ++ struct file *file) + { + struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); + struct key *key; +@@ -380,8 +381,8 @@ int afs_setattr(struct dentry *dentry, s + afs_writeback_all(vnode); + } + +- if (attr->ia_valid & ATTR_FILE) { +- key = attr->ia_file->private_data; ++ if (file) { ++ key = file->private_data; + } else { + key = afs_request_key(vnode->volume->cell); + if (IS_ERR(key)) { +@@ -391,10 +392,20 @@ int afs_setattr(struct dentry *dentry, s + } + + ret = afs_vnode_setattr(vnode, key, attr); +- if (!(attr->ia_valid & ATTR_FILE)) ++ if (!file) + key_put(key); + + error: + _leave(" = %d", ret); + return ret; + } ++ ++int afs_setattr(struct dentry *dentry, struct iattr *attr) ++{ ++ return afs_do_setattr(dentry, attr, NULL); ++} ++ ++int afs_fsetattr(struct file *file, struct iattr *attr) ++{ ++ return afs_do_setattr(file->f_path.dentry, attr, file); ++} +--- a/fs/afs/internal.h ++++ b/fs/afs/internal.h +@@ -548,6 +548,7 @@ extern void afs_zap_data(struct afs_vnod + extern int afs_validate(struct afs_vnode *, struct key *); + extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); + extern int afs_setattr(struct dentry *, struct iattr *); ++extern int afs_fsetattr(struct file *, struct iattr *); + extern void afs_clear_inode(struct inode *); + + /* +--- a/fs/attr.c ++++ b/fs/attr.c +@@ -100,8 +100,8 @@ int inode_setattr(struct inode * inode, + } + EXPORT_SYMBOL(inode_setattr); + +-int notify_change(struct dentry *dentry, struct vfsmount *mnt, +- struct iattr *attr) ++int fnotify_change(struct dentry *dentry, struct vfsmount *mnt, ++ struct iattr *attr, struct file *file) + { + struct inode *inode = dentry->d_inode; + mode_t mode = inode->i_mode; +@@ -165,8 +165,12 @@ int notify_change(struct dentry *dentry, + + if (inode->i_op && inode->i_op->setattr) { + error = security_inode_setattr(dentry, mnt, attr); +- if (!error) +- error = inode->i_op->setattr(dentry, attr); ++ if (!error) { ++ if (file && file->f_op && file->f_op->fsetattr) ++ error = file->f_op->fsetattr(file, attr); ++ else ++ error = inode->i_op->setattr(dentry, attr); ++ } + } else { + error = inode_change_ok(inode, attr); + if (!error) +@@ -188,5 +192,12 @@ int notify_change(struct dentry *dentry, + + return error; + } ++EXPORT_SYMBOL_GPL(fnotify_change); ++ ++int notify_change(struct dentry *dentry, struct vfsmount *mnt, ++ struct iattr *attr) ++{ ++ return fnotify_change(dentry, mnt, attr, NULL); ++} + + EXPORT_SYMBOL(notify_change); +--- a/fs/fuse/dir.c ++++ b/fs/fuse/dir.c +@@ -1105,21 +1105,22 @@ static int fuse_dir_fsync(struct file *f + return file ? fuse_fsync_common(file, de, datasync, 1) : 0; + } + +-static bool update_mtime(unsigned ivalid) ++static bool update_mtime(unsigned ivalid, bool have_file) + { + /* Always update if mtime is explicitly set */ + if (ivalid & ATTR_MTIME_SET) + return true; + + /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ +- if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) ++ if ((ivalid & ATTR_SIZE) && ((ivalid & ATTR_OPEN) || have_file)) + return false; + + /* In all other cases update */ + return true; + } + +-static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) ++static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg, ++ bool have_file) + { + unsigned ivalid = iattr->ia_valid; + +@@ -1138,7 +1139,7 @@ static void iattr_to_fattr(struct iattr + if (!(ivalid & ATTR_ATIME_SET)) + arg->valid |= FATTR_ATIME_NOW; + } +- if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) { ++ if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, have_file)) { + arg->valid |= FATTR_MTIME; + arg->mtime = iattr->ia_mtime.tv_sec; + arg->mtimensec = iattr->ia_mtime.tv_nsec; +@@ -1199,8 +1200,8 @@ void fuse_release_nowrite(struct inode * + * vmtruncate() doesn't allow for this case, so do the rlimit checking + * and the actual truncation by hand. + */ +-static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, +- struct file *file) ++int fuse_do_setattr(struct dentry *entry, struct iattr *attr, ++ struct file *file) + { + struct inode *inode = entry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); +@@ -1244,7 +1245,7 @@ static int fuse_do_setattr(struct dentry + + memset(&inarg, 0, sizeof(inarg)); + memset(&outarg, 0, sizeof(outarg)); +- iattr_to_fattr(attr, &inarg); ++ iattr_to_fattr(attr, &inarg, file != NULL); + if (file) { + struct fuse_file *ff = file->private_data; + inarg.valid |= FATTR_FH; +@@ -1314,10 +1315,7 @@ error: + + static int fuse_setattr(struct dentry *entry, struct iattr *attr) + { +- if (attr->ia_valid & ATTR_FILE) +- return fuse_do_setattr(entry, attr, attr->ia_file); +- else +- return fuse_do_setattr(entry, attr, NULL); ++ return fuse_do_setattr(entry, attr, NULL); + } + + static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, +--- a/fs/fuse/file.c ++++ b/fs/fuse/file.c +@@ -1466,6 +1466,11 @@ static loff_t fuse_file_llseek(struct fi + return retval; + } + ++static int fuse_fsetattr(struct file *file, struct iattr *attr) ++{ ++ return fuse_do_setattr(file->f_path.dentry, attr, file); ++} ++ + static const struct file_operations fuse_file_operations = { + .llseek = fuse_file_llseek, + .read = do_sync_read, +@@ -1479,6 +1484,7 @@ static const struct file_operations fuse + .fsync = fuse_fsync, + .lock = fuse_file_lock, + .flock = fuse_file_flock, ++ .fsetattr = fuse_fsetattr, + .splice_read = generic_file_splice_read, + }; + +@@ -1492,6 +1498,7 @@ static const struct file_operations fuse + .fsync = fuse_fsync, + .lock = fuse_file_lock, + .flock = fuse_file_flock, ++ .fsetattr = fuse_fsetattr, + /* no mmap and splice_read */ + }; + +--- a/fs/fuse/fuse_i.h ++++ b/fs/fuse/fuse_i.h +@@ -551,6 +551,10 @@ void fuse_truncate(struct address_space + */ + int fuse_dev_init(void); + ++ ++int fuse_do_setattr(struct dentry *entry, struct iattr *attr, ++ struct file *file); ++ + /** + * Cleanup the client device + */ +--- a/fs/open.c ++++ b/fs/open.c +@@ -207,16 +207,12 @@ int do_truncate(struct dentry *dentry, s + + newattrs.ia_size = length; + newattrs.ia_valid = ATTR_SIZE | time_attrs; +- if (filp) { +- newattrs.ia_file = filp; +- newattrs.ia_valid |= ATTR_FILE; +- } + + /* Remove suid/sgid on truncate too */ + newattrs.ia_valid |= should_remove_suid(dentry); + + mutex_lock(&dentry->d_inode->i_mutex); +- err = notify_change(dentry, mnt, &newattrs); ++ err = fnotify_change(dentry, mnt, &newattrs, filp); + mutex_unlock(&dentry->d_inode->i_mutex); + return err; + } +@@ -625,7 +621,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd + mode = inode->i_mode; + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; +- err = notify_change(dentry, file->f_path.mnt, &newattrs); ++ err = fnotify_change(dentry, file->f_path.mnt, &newattrs, file); + mutex_unlock(&inode->i_mutex); + mnt_drop_write(file->f_path.mnt); + out_putf: +@@ -669,7 +665,7 @@ SYSCALL_DEFINE2(chmod, const char __user + } + + static int chown_common(struct dentry * dentry, struct vfsmount *mnt, +- uid_t user, gid_t group) ++ uid_t user, gid_t group, struct file *file) + { + struct inode *inode = dentry->d_inode; + int error; +@@ -688,7 +684,7 @@ static int chown_common(struct dentry * + newattrs.ia_valid |= + ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; + mutex_lock(&inode->i_mutex); +- error = notify_change(dentry, mnt, &newattrs); ++ error = fnotify_change(dentry, mnt, &newattrs, file); + mutex_unlock(&inode->i_mutex); + + return error; +@@ -705,7 +701,7 @@ SYSCALL_DEFINE3(chown, const char __user + error = mnt_want_write(path.mnt); + if (error) + goto out_release; +- error = chown_common(path.dentry, path.mnt, user, group); ++ error = chown_common(path.dentry, path.mnt, user, group, NULL); + mnt_drop_write(path.mnt); + out_release: + path_put(&path); +@@ -730,7 +726,7 @@ SYSCALL_DEFINE5(fchownat, int, dfd, cons + error = mnt_want_write(path.mnt); + if (error) + goto out_release; +- error = chown_common(path.dentry, path.mnt, user, group); ++ error = chown_common(path.dentry, path.mnt, user, group, NULL); + mnt_drop_write(path.mnt); + out_release: + path_put(&path); +@@ -749,7 +745,7 @@ SYSCALL_DEFINE3(lchown, const char __use + error = mnt_want_write(path.mnt); + if (error) + goto out_release; +- error = chown_common(path.dentry, path.mnt, user, group); ++ error = chown_common(path.dentry, path.mnt, user, group, NULL); + mnt_drop_write(path.mnt); + out_release: + path_put(&path); +@@ -772,7 +768,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd + goto out_fput; + dentry = file->f_path.dentry; + audit_inode(NULL, dentry); +- error = chown_common(dentry, file->f_path.mnt, user, group); ++ error = chown_common(dentry, file->f_path.mnt, user, group, file); + mnt_drop_write(file->f_path.mnt); + out_fput: + fput(file); +--- a/fs/utimes.c ++++ b/fs/utimes.c +@@ -48,7 +48,8 @@ static bool nsec_valid(long nsec) + return nsec >= 0 && nsec <= 999999999; + } + +-static int utimes_common(struct path *path, struct timespec *times) ++static int utimes_common(struct path *path, struct timespec *times, ++ struct file *f) + { + int error; + struct iattr newattrs; +@@ -102,7 +103,7 @@ static int utimes_common(struct path *pa + } + } + mutex_lock(&inode->i_mutex); +- error = notify_change(path->dentry, path->mnt, &newattrs); ++ error = fnotify_change(path->dentry, path->mnt, &newattrs, f); + mutex_unlock(&inode->i_mutex); + + mnt_drop_write_and_out: +@@ -149,7 +150,7 @@ long do_utimes(int dfd, char __user *fil + if (!file) + goto out; + +- error = utimes_common(&file->f_path, times); ++ error = utimes_common(&file->f_path, times, file); + fput(file); + } else { + struct path path; +@@ -162,7 +163,7 @@ long do_utimes(int dfd, char __user *fil + if (error) + goto out; + +- error = utimes_common(&path, times); ++ error = utimes_common(&path, times, NULL); + path_put(&path); + } + +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -367,13 +367,6 @@ struct iattr { + struct timespec ia_atime; + struct timespec ia_mtime; + struct timespec ia_ctime; +- +- /* +- * Not an attribute, but an auxilary info for filesystems wanting to +- * implement an ftruncate() like method. NOTE: filesystem should +- * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). +- */ +- struct file *ia_file; + }; + + /* +@@ -1280,6 +1273,7 @@ struct file_operations { + #define HAVE_FOP_OPEN_EXEC + int (*open_exec) (struct inode *); + int (*setlease)(struct file *, long, struct file_lock **); ++ int (*fsetattr)(struct file *, struct iattr *); + }; + + struct inode_operations { +@@ -1799,6 +1793,7 @@ extern int do_remount_sb(struct super_bl + extern sector_t bmap(struct inode *, sector_t); + #endif + extern int notify_change(struct dentry *, struct vfsmount *, struct iattr *); ++extern int fnotify_change(struct dentry *, struct vfsmount *, struct iattr *, struct file *); + extern int inode_permission(struct inode *, int); + extern int generic_permission(struct inode *, int, + int (*check_acl)(struct inode *, int)); diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/remove_suid.diff b/src/patches/suse-2.6.27.25/patches.apparmor/remove_suid.diff new file mode 100644 index 0000000000..9c639fab93 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/remove_suid.diff @@ -0,0 +1,41 @@ +From: Andreas Gruenbacher +Subject: Pass struct path down to remove_suid and children + +Required by a later patch that adds a struct vfsmount parameter to +notify_change(). + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + + mm/filemap.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -1826,12 +1826,12 @@ int should_remove_suid(struct dentry *de + } + EXPORT_SYMBOL(should_remove_suid); + +-static int __remove_suid(struct dentry *dentry, int kill) ++static int __remove_suid(struct path *path, int kill) + { + struct iattr newattrs; + + newattrs.ia_valid = ATTR_FORCE | kill; +- return notify_change(dentry, &newattrs); ++ return notify_change(path->dentry, &newattrs); + } + + int file_remove_suid(struct file *file) +@@ -1846,7 +1846,7 @@ int file_remove_suid(struct file *file) + if (killpriv) + error = security_inode_killpriv(dentry); + if (!error && killsuid) +- error = __remove_suid(dentry, killsuid); ++ error = __remove_suid(&file->f_path, killsuid); + + return error; + } diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/security-create.diff b/src/patches/suse-2.6.27.25/patches.apparmor/security-create.diff new file mode 100644 index 0000000000..486d31c48a --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/security-create.diff @@ -0,0 +1,107 @@ +From: Tony Jones +Subject: Pass struct vfsmount to the inode_create LSM hook + +This is needed for computing pathnames in the AppArmor LSM. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/namei.c | 2 +- + include/linux/security.h | 9 ++++++--- + security/capability.c | 2 +- + security/security.c | 5 +++-- + security/selinux/hooks.c | 3 ++- + 5 files changed, 13 insertions(+), 8 deletions(-) + +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -1543,7 +1543,7 @@ int vfs_create(struct inode *dir, struct + return -EACCES; /* shouldn't it be ENOSYS? */ + mode &= S_IALLUGO; + mode |= S_IFREG; +- error = security_inode_create(dir, dentry, mode); ++ error = security_inode_create(dir, dentry, nd ? nd->path.mnt : NULL, mode); + if (error) + return error; + DQUOT_INIT(dir); +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -337,6 +337,7 @@ static inline void security_free_mnt_opt + * Check permission to create a regular file. + * @dir contains inode structure of the parent of the new file. + * @dentry contains the dentry structure for the file to be created. ++ * @mnt is the vfsmount corresponding to @dentry (may be NULL). + * @mode contains the file mode of the file to be created. + * Return 0 if permission is granted. + * @inode_link: +@@ -1354,8 +1355,8 @@ struct security_operations { + void (*inode_free_security) (struct inode *inode); + int (*inode_init_security) (struct inode *inode, struct inode *dir, + char **name, void **value, size_t *len); +- int (*inode_create) (struct inode *dir, +- struct dentry *dentry, int mode); ++ int (*inode_create) (struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode); + int (*inode_link) (struct dentry *old_dentry, + struct inode *dir, struct dentry *new_dentry); + int (*inode_unlink) (struct inode *dir, struct dentry *dentry); +@@ -1622,7 +1623,8 @@ int security_inode_alloc(struct inode *i + void security_inode_free(struct inode *inode); + int security_inode_init_security(struct inode *inode, struct inode *dir, + char **name, void **value, size_t *len); +-int security_inode_create(struct inode *dir, struct dentry *dentry, int mode); ++int security_inode_create(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode); + int security_inode_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry); + int security_inode_unlink(struct inode *dir, struct dentry *dentry); +@@ -1968,6 +1970,7 @@ static inline int security_inode_init_se + + static inline int security_inode_create(struct inode *dir, + struct dentry *dentry, ++ struct vfsmount *mnt, + int mode) + { + return 0; +--- a/security/capability.c ++++ b/security/capability.c +@@ -155,7 +155,7 @@ static int cap_inode_init_security(struc + } + + static int cap_inode_create(struct inode *inode, struct dentry *dentry, +- int mask) ++ struct vfsmount *mnt, int mask) + { + return 0; + } +--- a/security/security.c ++++ b/security/security.c +@@ -358,11 +358,12 @@ int security_inode_init_security(struct + } + EXPORT_SYMBOL(security_inode_init_security); + +-int security_inode_create(struct inode *dir, struct dentry *dentry, int mode) ++int security_inode_create(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode) + { + if (unlikely(IS_PRIVATE(dir))) + return 0; +- return security_ops->inode_create(dir, dentry, mode); ++ return security_ops->inode_create(dir, dentry, mnt, mode); + } + + int security_inode_link(struct dentry *old_dentry, struct inode *dir, +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -2566,7 +2566,8 @@ static int selinux_inode_init_security(s + return 0; + } + +-static int selinux_inode_create(struct inode *dir, struct dentry *dentry, int mask) ++static int selinux_inode_create(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mask) + { + return may_create(dir, dentry, SECCLASS_FILE); + } diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/security-getxattr.diff b/src/patches/suse-2.6.27.25/patches.apparmor/security-getxattr.diff new file mode 100644 index 0000000000..6a8663ca2c --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/security-getxattr.diff @@ -0,0 +1,128 @@ +From: Tony Jones +Subject: Pass struct vfsmount to the inode_getxattr LSM hook + +This is needed for computing pathnames in the AppArmor LSM. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/xattr.c | 2 +- + include/linux/security.h | 11 +++++++---- + security/capability.c | 3 ++- + security/security.c | 5 +++-- + security/selinux/hooks.c | 3 ++- + security/smack/smack_lsm.c | 4 +++- + 6 files changed, 18 insertions(+), 10 deletions(-) + +--- a/fs/xattr.c ++++ b/fs/xattr.c +@@ -141,7 +141,7 @@ vfs_getxattr(struct dentry *dentry, stru + if (error) + return error; + +- error = security_inode_getxattr(dentry, name); ++ error = security_inode_getxattr(dentry, mnt, name); + if (error) + return error; + +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -446,7 +446,7 @@ static inline void security_free_mnt_opt + * @value identified by @name for @dentry and @mnt. + * @inode_getxattr: + * Check permission before obtaining the extended attributes +- * identified by @name for @dentry. ++ * identified by @name for @dentry and @mnt. + * Return 0 if permission is granted. + * @inode_listxattr: + * Check permission before obtaining the list of extended attribute +@@ -1400,7 +1400,8 @@ struct security_operations { + struct vfsmount *mnt, + const char *name, const void *value, + size_t size, int flags); +- int (*inode_getxattr) (struct dentry *dentry, const char *name); ++ int (*inode_getxattr) (struct dentry *dentry, struct vfsmount *mnt, ++ const char *name); + int (*inode_listxattr) (struct dentry *dentry); + int (*inode_removexattr) (struct dentry *dentry, const char *name); + int (*inode_need_killpriv) (struct dentry *dentry); +@@ -1676,7 +1677,8 @@ int security_inode_setxattr(struct dentr + void security_inode_post_setxattr(struct dentry *dentry, struct vfsmount *mnt, + const char *name, const void *value, + size_t size, int flags); +-int security_inode_getxattr(struct dentry *dentry, const char *name); ++int security_inode_getxattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name); + int security_inode_listxattr(struct dentry *dentry); + int security_inode_removexattr(struct dentry *dentry, const char *name); + int security_inode_need_killpriv(struct dentry *dentry); +@@ -2113,7 +2115,8 @@ static inline void security_inode_post_s + { } + + static inline int security_inode_getxattr(struct dentry *dentry, +- const char *name) ++ struct vfsmount *mnt, ++ const char *name) + { + return 0; + } +--- a/security/capability.c ++++ b/security/capability.c +@@ -241,7 +241,8 @@ static void cap_inode_post_setxattr(stru + { + } + +-static int cap_inode_getxattr(struct dentry *dentry, const char *name) ++static int cap_inode_getxattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name) + { + return 0; + } +--- a/security/security.c ++++ b/security/security.c +@@ -491,11 +491,12 @@ void security_inode_post_setxattr(struct + flags); + } + +-int security_inode_getxattr(struct dentry *dentry, const char *name) ++int security_inode_getxattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_getxattr(dentry, name); ++ return security_ops->inode_getxattr(dentry, mnt, name); + } + + int security_inode_listxattr(struct dentry *dentry) +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -2796,7 +2796,8 @@ static void selinux_inode_post_setxattr( + return; + } + +-static int selinux_inode_getxattr(struct dentry *dentry, const char *name) ++static int selinux_inode_getxattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name) + { + return dentry_has_perm(current, NULL, dentry, FILE__GETATTR); + } +--- a/security/smack/smack_lsm.c ++++ b/security/smack/smack_lsm.c +@@ -673,11 +673,13 @@ static void smack_inode_post_setxattr(st + /* + * smack_inode_getxattr - Smack check on getxattr + * @dentry: the object ++ * @mnt: unused + * @name: unused + * + * Returns 0 if access is permitted, an error code otherwise + */ +-static int smack_inode_getxattr(struct dentry *dentry, const char *name) ++static int smack_inode_getxattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name) + { + return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ); + } diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/security-link.diff b/src/patches/suse-2.6.27.25/patches.apparmor/security-link.diff new file mode 100644 index 0000000000..ddc4d888f0 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/security-link.diff @@ -0,0 +1,149 @@ +From: Tony Jones +Subject: Pass the struct vfsmounts to the inode_link LSM hook + +This is needed for computing pathnames in the AppArmor LSM. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/namei.c | 3 ++- + include/linux/security.h | 18 ++++++++++++------ + security/capability.c | 5 +++-- + security/security.c | 8 +++++--- + security/selinux/hooks.c | 9 +++++++-- + security/smack/smack_lsm.c | 5 +++-- + 6 files changed, 32 insertions(+), 16 deletions(-) + +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -2437,7 +2437,8 @@ int vfs_link(struct dentry *old_dentry, + if (S_ISDIR(inode->i_mode)) + return -EPERM; + +- error = security_inode_link(old_dentry, dir, new_dentry); ++ error = security_inode_link(old_dentry, old_mnt, dir, new_dentry, ++ new_mnt); + if (error) + return error; + +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -343,8 +343,10 @@ static inline void security_free_mnt_opt + * @inode_link: + * Check permission before creating a new hard link to a file. + * @old_dentry contains the dentry structure for an existing link to the file. ++ * @old_mnt is the vfsmount corresponding to @old_dentry (may be NULL). + * @dir contains the inode structure of the parent directory of the new link. + * @new_dentry contains the dentry structure for the new link. ++ * @new_mnt is the vfsmount corresponding to @new_dentry (may be NULL). + * Return 0 if permission is granted. + * @inode_unlink: + * Check the permission to remove a hard link to a file. +@@ -1362,8 +1364,9 @@ struct security_operations { + char **name, void **value, size_t *len); + int (*inode_create) (struct inode *dir, struct dentry *dentry, + struct vfsmount *mnt, int mode); +- int (*inode_link) (struct dentry *old_dentry, +- struct inode *dir, struct dentry *new_dentry); ++ int (*inode_link) (struct dentry *old_dentry, struct vfsmount *old_mnt, ++ struct inode *dir, struct dentry *new_dentry, ++ struct vfsmount *new_mnt); + int (*inode_unlink) (struct inode *dir, struct dentry *dentry); + int (*inode_symlink) (struct inode *dir, struct dentry *dentry, + struct vfsmount *mnt, const char *old_name); +@@ -1632,8 +1635,9 @@ int security_inode_init_security(struct + char **name, void **value, size_t *len); + int security_inode_create(struct inode *dir, struct dentry *dentry, + struct vfsmount *mnt, int mode); +-int security_inode_link(struct dentry *old_dentry, struct inode *dir, +- struct dentry *new_dentry); ++int security_inode_link(struct dentry *old_dentry, struct vfsmount *old_mnt, ++ struct inode *dir, struct dentry *new_dentry, ++ struct vfsmount *new_mnt); + int security_inode_unlink(struct inode *dir, struct dentry *dentry); + int security_inode_symlink(struct inode *dir, struct dentry *dentry, + struct vfsmount *mnt, const char *old_name); +@@ -1987,8 +1991,10 @@ static inline int security_inode_create( + } + + static inline int security_inode_link(struct dentry *old_dentry, +- struct inode *dir, +- struct dentry *new_dentry) ++ struct vfsmount *old_mnt, ++ struct inode *dir, ++ struct dentry *new_dentry, ++ struct vfsmount *new_mnt) + { + return 0; + } +--- a/security/capability.c ++++ b/security/capability.c +@@ -160,8 +160,9 @@ static int cap_inode_create(struct inode + return 0; + } + +-static int cap_inode_link(struct dentry *old_dentry, struct inode *inode, +- struct dentry *new_dentry) ++static int cap_inode_link(struct dentry *old_dentry, struct vfsmount *old_mnt, ++ struct inode *inode, ++ struct dentry *new_dentry, struct vfsmount *new_mnt) + { + return 0; + } +--- a/security/security.c ++++ b/security/security.c +@@ -366,12 +366,14 @@ int security_inode_create(struct inode * + return security_ops->inode_create(dir, dentry, mnt, mode); + } + +-int security_inode_link(struct dentry *old_dentry, struct inode *dir, +- struct dentry *new_dentry) ++int security_inode_link(struct dentry *old_dentry, struct vfsmount *old_mnt, ++ struct inode *dir, struct dentry *new_dentry, ++ struct vfsmount *new_mnt) + { + if (unlikely(IS_PRIVATE(old_dentry->d_inode))) + return 0; +- return security_ops->inode_link(old_dentry, dir, new_dentry); ++ return security_ops->inode_link(old_dentry, old_mnt, dir, ++ new_dentry, new_mnt); + } + + int security_inode_unlink(struct inode *dir, struct dentry *dentry) +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -2572,11 +2572,16 @@ static int selinux_inode_create(struct i + return may_create(dir, dentry, SECCLASS_FILE); + } + +-static int selinux_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) ++static int selinux_inode_link(struct dentry *old_dentry, ++ struct vfsmount *old_mnt, ++ struct inode *dir, ++ struct dentry *new_dentry, ++ struct vfsmount *new_mnt) + { + int rc; + +- rc = secondary_ops->inode_link(old_dentry, dir, new_dentry); ++ rc = secondary_ops->inode_link(old_dentry, old_mnt, dir, new_dentry, ++ new_mnt); + if (rc) + return rc; + return may_link(dir, old_dentry, MAY_LINK); +--- a/security/smack/smack_lsm.c ++++ b/security/smack/smack_lsm.c +@@ -432,8 +432,9 @@ static int smack_inode_init_security(str + * + * Returns 0 if access is permitted, an error code otherwise + */ +-static int smack_inode_link(struct dentry *old_dentry, struct inode *dir, +- struct dentry *new_dentry) ++static int smack_inode_link(struct dentry *old_dentry, struct vfsmount *old_mnt, ++ struct inode *dir, ++ struct dentry *new_dentry, struct vfsmount *new_mnt) + { + int rc; + char *isp; diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/security-listxattr.diff b/src/patches/suse-2.6.27.25/patches.apparmor/security-listxattr.diff new file mode 100644 index 0000000000..b4b5efe66d --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/security-listxattr.diff @@ -0,0 +1,105 @@ +From: Tony Jones +Subject: Pass struct vfsmount to the inode_listxattr LSM hook + +This is needed for computing pathnames in the AppArmor LSM. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/xattr.c | 2 +- + include/linux/security.h | 9 +++++---- + security/capability.c | 2 +- + security/security.c | 4 ++-- + security/selinux/hooks.c | 2 +- + 5 files changed, 10 insertions(+), 9 deletions(-) + +--- a/fs/xattr.c ++++ b/fs/xattr.c +@@ -174,7 +174,7 @@ vfs_listxattr(struct dentry *dentry, str + struct inode *inode = dentry->d_inode; + ssize_t error; + +- error = security_inode_listxattr(dentry); ++ error = security_inode_listxattr(dentry, mnt); + if (error) + return error; + error = -EOPNOTSUPP; +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -450,7 +450,7 @@ static inline void security_free_mnt_opt + * Return 0 if permission is granted. + * @inode_listxattr: + * Check permission before obtaining the list of extended attribute +- * names for @dentry. ++ * names for @dentry and @mnt. + * Return 0 if permission is granted. + * @inode_removexattr: + * Check permission before removing the extended attribute +@@ -1402,7 +1402,7 @@ struct security_operations { + size_t size, int flags); + int (*inode_getxattr) (struct dentry *dentry, struct vfsmount *mnt, + const char *name); +- int (*inode_listxattr) (struct dentry *dentry); ++ int (*inode_listxattr) (struct dentry *dentry, struct vfsmount *mnt); + int (*inode_removexattr) (struct dentry *dentry, const char *name); + int (*inode_need_killpriv) (struct dentry *dentry); + int (*inode_killpriv) (struct dentry *dentry); +@@ -1679,7 +1679,7 @@ void security_inode_post_setxattr(struct + size_t size, int flags); + int security_inode_getxattr(struct dentry *dentry, struct vfsmount *mnt, + const char *name); +-int security_inode_listxattr(struct dentry *dentry); ++int security_inode_listxattr(struct dentry *dentry, struct vfsmount *mnt); + int security_inode_removexattr(struct dentry *dentry, const char *name); + int security_inode_need_killpriv(struct dentry *dentry); + int security_inode_killpriv(struct dentry *dentry); +@@ -2121,7 +2121,8 @@ static inline int security_inode_getxatt + return 0; + } + +-static inline int security_inode_listxattr(struct dentry *dentry) ++static inline int security_inode_listxattr(struct dentry *dentry, ++ struct vfsmount *mnt) + { + return 0; + } +--- a/security/capability.c ++++ b/security/capability.c +@@ -247,7 +247,7 @@ static int cap_inode_getxattr(struct den + return 0; + } + +-static int cap_inode_listxattr(struct dentry *dentry) ++static int cap_inode_listxattr(struct dentry *dentry, struct vfsmount *mnt) + { + return 0; + } +--- a/security/security.c ++++ b/security/security.c +@@ -499,11 +499,11 @@ int security_inode_getxattr(struct dentr + return security_ops->inode_getxattr(dentry, mnt, name); + } + +-int security_inode_listxattr(struct dentry *dentry) ++int security_inode_listxattr(struct dentry *dentry, struct vfsmount *mnt) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_listxattr(dentry); ++ return security_ops->inode_listxattr(dentry, mnt); + } + + int security_inode_removexattr(struct dentry *dentry, const char *name) +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -2802,7 +2802,7 @@ static int selinux_inode_getxattr(struct + return dentry_has_perm(current, NULL, dentry, FILE__GETATTR); + } + +-static int selinux_inode_listxattr(struct dentry *dentry) ++static int selinux_inode_listxattr(struct dentry *dentry, struct vfsmount *mnt) + { + return dentry_has_perm(current, NULL, dentry, FILE__GETATTR); + } diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/security-mkdir.diff b/src/patches/suse-2.6.27.25/patches.apparmor/security-mkdir.diff new file mode 100644 index 0000000000..e9a8cdce81 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/security-mkdir.diff @@ -0,0 +1,106 @@ +From: Tony Jones +Subject: Pass struct vfsmount to the inode_mkdir LSM hook + +This is needed for computing pathnames in the AppArmor LSM. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/namei.c | 2 +- + include/linux/security.h | 8 ++++++-- + security/capability.c | 2 +- + security/security.c | 5 +++-- + security/selinux/hooks.c | 3 ++- + 5 files changed, 13 insertions(+), 7 deletions(-) + +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -2089,7 +2089,7 @@ int vfs_mkdir(struct inode *dir, struct + return -EPERM; + + mode &= (S_IRWXUGO|S_ISVTX); +- error = security_inode_mkdir(dir, dentry, mode); ++ error = security_inode_mkdir(dir, dentry, mnt, mode); + if (error) + return error; + +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -362,6 +362,7 @@ static inline void security_free_mnt_opt + * associated with inode strcture @dir. + * @dir containst the inode structure of parent of the directory to be created. + * @dentry contains the dentry structure of new directory. ++ * @mnt is the vfsmount corresponding to @dentry (may be NULL). + * @mode contains the mode of new directory. + * Return 0 if permission is granted. + * @inode_rmdir: +@@ -1363,7 +1364,8 @@ struct security_operations { + int (*inode_unlink) (struct inode *dir, struct dentry *dentry); + int (*inode_symlink) (struct inode *dir, + struct dentry *dentry, const char *old_name); +- int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, int mode); ++ int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode); + int (*inode_rmdir) (struct inode *dir, struct dentry *dentry); + int (*inode_mknod) (struct inode *dir, struct dentry *dentry, + int mode, dev_t dev); +@@ -1632,7 +1634,8 @@ int security_inode_link(struct dentry *o + int security_inode_unlink(struct inode *dir, struct dentry *dentry); + int security_inode_symlink(struct inode *dir, struct dentry *dentry, + const char *old_name); +-int security_inode_mkdir(struct inode *dir, struct dentry *dentry, int mode); ++int security_inode_mkdir(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode); + int security_inode_rmdir(struct inode *dir, struct dentry *dentry); + int security_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev); + int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, +@@ -2001,6 +2004,7 @@ static inline int security_inode_symlink + + static inline int security_inode_mkdir(struct inode *dir, + struct dentry *dentry, ++ struct vfsmount *mnt, + int mode) + { + return 0; +--- a/security/capability.c ++++ b/security/capability.c +@@ -178,7 +178,7 @@ static int cap_inode_symlink(struct inod + } + + static int cap_inode_mkdir(struct inode *inode, struct dentry *dentry, +- int mask) ++ struct vfsmount *mnt, int mask) + { + return 0; + } +--- a/security/security.c ++++ b/security/security.c +@@ -389,11 +389,12 @@ int security_inode_symlink(struct inode + return security_ops->inode_symlink(dir, dentry, old_name); + } + +-int security_inode_mkdir(struct inode *dir, struct dentry *dentry, int mode) ++int security_inode_mkdir(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode) + { + if (unlikely(IS_PRIVATE(dir))) + return 0; +- return security_ops->inode_mkdir(dir, dentry, mode); ++ return security_ops->inode_mkdir(dir, dentry, mnt, mode); + } + + int security_inode_rmdir(struct inode *dir, struct dentry *dentry) +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -2597,7 +2597,8 @@ static int selinux_inode_symlink(struct + return may_create(dir, dentry, SECCLASS_LNK_FILE); + } + +-static int selinux_inode_mkdir(struct inode *dir, struct dentry *dentry, int mask) ++static int selinux_inode_mkdir(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mask) + { + return may_create(dir, dentry, SECCLASS_DIR); + } diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/security-mknod.diff b/src/patches/suse-2.6.27.25/patches.apparmor/security-mknod.diff new file mode 100644 index 0000000000..6bcd0fc7f6 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/security-mknod.diff @@ -0,0 +1,124 @@ +From: Tony Jones +Subject: Pass struct vfsmount to the inode_mknod LSM hook + +This is needed for computing pathnames in the AppArmor LSM. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/namei.c | 6 +++--- + include/linux/security.h | 7 +++++-- + security/capability.c | 2 +- + security/security.c | 5 +++-- + security/selinux/hooks.c | 5 +++-- + 5 files changed, 15 insertions(+), 10 deletions(-) + +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -1994,7 +1994,7 @@ int vfs_mknod(struct inode *dir, struct + if (error) + return error; + +- error = security_inode_mknod(dir, dentry, mode, dev); ++ error = security_inode_mknod(dir, dentry, mnt, mode, dev); + if (error) + return error; + +@@ -2056,11 +2056,11 @@ asmlinkage long sys_mknodat(int dfd, con + break; + case S_IFCHR: case S_IFBLK: + error = vfs_mknod(nd.path.dentry->d_inode, dentry, +- nd.path, mode, new_decode_dev(dev)); ++ nd.path.mnt, mode, new_decode_dev(dev)); + break; + case S_IFIFO: case S_IFSOCK: + error = vfs_mknod(nd.path.dentry->d_inode, dentry, +- nd.path, mode, 0); ++ nd.path.mnt, mode, 0); + break; + } + mnt_drop_write(nd.path.mnt); +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -377,6 +377,7 @@ static inline void security_free_mnt_opt + * and not this hook. + * @dir contains the inode structure of parent of the new file. + * @dentry contains the dentry structure of the new file. ++ * @mnt is the vfsmount corresponding to @dentry (may be NULL). + * @mode contains the mode of the new file. + * @dev contains the device number. + * Return 0 if permission is granted. +@@ -1368,7 +1369,7 @@ struct security_operations { + struct vfsmount *mnt, int mode); + int (*inode_rmdir) (struct inode *dir, struct dentry *dentry); + int (*inode_mknod) (struct inode *dir, struct dentry *dentry, +- int mode, dev_t dev); ++ struct vfsmount *mnt, int mode, dev_t dev); + int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); + int (*inode_readlink) (struct dentry *dentry); +@@ -1637,7 +1638,8 @@ int security_inode_symlink(struct inode + int security_inode_mkdir(struct inode *dir, struct dentry *dentry, + struct vfsmount *mnt, int mode); + int security_inode_rmdir(struct inode *dir, struct dentry *dentry); +-int security_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev); ++int security_inode_mknod(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode, dev_t dev); + int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); + int security_inode_readlink(struct dentry *dentry); +@@ -2018,6 +2020,7 @@ static inline int security_inode_rmdir(s + + static inline int security_inode_mknod(struct inode *dir, + struct dentry *dentry, ++ struct vfsmount *mnt, + int mode, dev_t dev) + { + return 0; +--- a/security/capability.c ++++ b/security/capability.c +@@ -189,7 +189,7 @@ static int cap_inode_rmdir(struct inode + } + + static int cap_inode_mknod(struct inode *inode, struct dentry *dentry, +- int mode, dev_t dev) ++ struct vfsmount *mnt, int mode, dev_t dev) + { + return 0; + } +--- a/security/security.c ++++ b/security/security.c +@@ -404,11 +404,12 @@ int security_inode_rmdir(struct inode *d + return security_ops->inode_rmdir(dir, dentry); + } + +-int security_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) ++int security_inode_mknod(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode, dev_t dev) + { + if (unlikely(IS_PRIVATE(dir))) + return 0; +- return security_ops->inode_mknod(dir, dentry, mode, dev); ++ return security_ops->inode_mknod(dir, dentry, mnt, mode, dev); + } + + int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -2608,11 +2608,12 @@ static int selinux_inode_rmdir(struct in + return may_link(dir, dentry, MAY_RMDIR); + } + +-static int selinux_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) ++static int selinux_inode_mknod(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, int mode, dev_t dev) + { + int rc; + +- rc = secondary_ops->inode_mknod(dir, dentry, mode, dev); ++ rc = secondary_ops->inode_mknod(dir, dentry, mnt, mode, dev); + if (rc) + return rc; + diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/security-readlink.diff b/src/patches/suse-2.6.27.25/patches.apparmor/security-readlink.diff new file mode 100644 index 0000000000..d805c07a09 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/security-readlink.diff @@ -0,0 +1,104 @@ +From: Tony Jones +Subject: Pass struct vfsmount to the inode_readlink LSM hook + +This is needed for computing pathnames in the AppArmor LSM. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/stat.c | 2 +- + include/linux/security.h | 8 +++++--- + security/capability.c | 2 +- + security/security.c | 4 ++-- + security/selinux/hooks.c | 2 +- + 5 files changed, 10 insertions(+), 8 deletions(-) + +--- a/fs/stat.c ++++ b/fs/stat.c +@@ -308,7 +308,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, co + + error = -EINVAL; + if (inode->i_op && inode->i_op->readlink) { +- error = security_inode_readlink(path.dentry); ++ error = security_inode_readlink(path.dentry, path.mnt); + if (!error) { + touch_atime(path.mnt, path.dentry); + error = inode->i_op->readlink(path.dentry, +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -392,6 +392,7 @@ static inline void security_free_mnt_opt + * @inode_readlink: + * Check the permission to read the symbolic link. + * @dentry contains the dentry structure for the file link. ++ * @mnt is the vfsmount corresponding to @dentry (may be NULL). + * Return 0 if permission is granted. + * @inode_follow_link: + * Check permission to follow a symbolic link when looking up a pathname. +@@ -1373,7 +1374,7 @@ struct security_operations { + struct vfsmount *mnt, int mode, dev_t dev); + int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); +- int (*inode_readlink) (struct dentry *dentry); ++ int (*inode_readlink) (struct dentry *dentry, struct vfsmount *mnt); + int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd); + int (*inode_permission) (struct inode *inode, int mask); + int (*inode_setattr) (struct dentry *dentry, struct vfsmount *, +@@ -1643,7 +1644,7 @@ int security_inode_mknod(struct inode *d + struct vfsmount *mnt, int mode, dev_t dev); + int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); +-int security_inode_readlink(struct dentry *dentry); ++int security_inode_readlink(struct dentry *dentry, struct vfsmount *mnt); + int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd); + int security_inode_permission(struct inode *inode, int mask); + int security_inode_setattr(struct dentry *dentry, struct vfsmount *mnt, +@@ -2036,7 +2037,8 @@ static inline int security_inode_rename( + return 0; + } + +-static inline int security_inode_readlink(struct dentry *dentry) ++static inline int security_inode_readlink(struct dentry *dentry, ++ struct vfsmount *mnt) + { + return 0; + } +--- a/security/capability.c ++++ b/security/capability.c +@@ -200,7 +200,7 @@ static int cap_inode_rename(struct inode + return 0; + } + +-static int cap_inode_readlink(struct dentry *dentry) ++static int cap_inode_readlink(struct dentry *dentry, struct vfsmount *mnt) + { + return 0; + } +--- a/security/security.c ++++ b/security/security.c +@@ -422,11 +422,11 @@ int security_inode_rename(struct inode * + new_dir, new_dentry); + } + +-int security_inode_readlink(struct dentry *dentry) ++int security_inode_readlink(struct dentry *dentry, struct vfsmount *mnt) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_readlink(dentry); ++ return security_ops->inode_readlink(dentry, mnt); + } + + int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd) +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -2627,7 +2627,7 @@ static int selinux_inode_rename(struct i + return may_rename(old_inode, old_dentry, new_inode, new_dentry); + } + +-static int selinux_inode_readlink(struct dentry *dentry) ++static int selinux_inode_readlink(struct dentry *dentry, struct vfsmount *mnt) + { + return dentry_has_perm(current, NULL, dentry, FILE__READ); + } diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/security-removexattr.diff b/src/patches/suse-2.6.27.25/patches.apparmor/security-removexattr.diff new file mode 100644 index 0000000000..54ecb70de7 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/security-removexattr.diff @@ -0,0 +1,143 @@ +From: Tony Jones +Subject: Pass struct vfsmount to the inode_removexattr LSM hook + +This is needed for computing pathnames in the AppArmor LSM. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/xattr.c | 2 +- + include/linux/security.h | 14 +++++++++----- + security/commoncap.c | 3 ++- + security/security.c | 5 +++-- + security/selinux/hooks.c | 3 ++- + security/smack/smack_lsm.c | 6 ++++-- + 6 files changed, 21 insertions(+), 12 deletions(-) + +--- a/fs/xattr.c ++++ b/fs/xattr.c +@@ -202,7 +202,7 @@ vfs_removexattr(struct dentry *dentry, s + if (error) + return error; + +- error = security_inode_removexattr(dentry, name); ++ error = security_inode_removexattr(dentry, mnt, name); + if (error) + return error; + +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -57,7 +57,8 @@ extern int cap_bprm_secureexec(struct li + extern int cap_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, + const char *name, const void *value, size_t size, + int flags); +-extern int cap_inode_removexattr(struct dentry *dentry, const char *name); ++extern int cap_inode_removexattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name); + extern int cap_inode_need_killpriv(struct dentry *dentry); + extern int cap_inode_killpriv(struct dentry *dentry); + extern int cap_task_post_setuid(uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); +@@ -1403,7 +1404,8 @@ struct security_operations { + int (*inode_getxattr) (struct dentry *dentry, struct vfsmount *mnt, + const char *name); + int (*inode_listxattr) (struct dentry *dentry, struct vfsmount *mnt); +- int (*inode_removexattr) (struct dentry *dentry, const char *name); ++ int (*inode_removexattr) (struct dentry *dentry, struct vfsmount *mnt, ++ const char *name); + int (*inode_need_killpriv) (struct dentry *dentry); + int (*inode_killpriv) (struct dentry *dentry); + int (*inode_getsecurity) (const struct inode *inode, const char *name, void **buffer, bool alloc); +@@ -1680,7 +1682,8 @@ void security_inode_post_setxattr(struct + int security_inode_getxattr(struct dentry *dentry, struct vfsmount *mnt, + const char *name); + int security_inode_listxattr(struct dentry *dentry, struct vfsmount *mnt); +-int security_inode_removexattr(struct dentry *dentry, const char *name); ++int security_inode_removexattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name); + int security_inode_need_killpriv(struct dentry *dentry); + int security_inode_killpriv(struct dentry *dentry); + int security_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc); +@@ -2128,9 +2131,10 @@ static inline int security_inode_listxat + } + + static inline int security_inode_removexattr(struct dentry *dentry, +- const char *name) ++ struct vfsmount *mnt, ++ const char *name) + { +- return cap_inode_removexattr(dentry, name); ++ return cap_inode_removexattr(dentry, mnt, name); + } + + static inline int security_inode_need_killpriv(struct dentry *dentry) +--- a/security/commoncap.c ++++ b/security/commoncap.c +@@ -429,7 +429,8 @@ int cap_inode_setxattr(struct dentry *de + return 0; + } + +-int cap_inode_removexattr(struct dentry *dentry, const char *name) ++int cap_inode_removexattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name) + { + if (!strcmp(name, XATTR_NAME_CAPS)) { + if (!capable(CAP_SETFCAP)) +--- a/security/security.c ++++ b/security/security.c +@@ -506,11 +506,12 @@ int security_inode_listxattr(struct dent + return security_ops->inode_listxattr(dentry, mnt); + } + +-int security_inode_removexattr(struct dentry *dentry, const char *name) ++int security_inode_removexattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_removexattr(dentry, name); ++ return security_ops->inode_removexattr(dentry, mnt, name); + } + + int security_inode_need_killpriv(struct dentry *dentry) +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -2807,7 +2807,8 @@ static int selinux_inode_listxattr(struc + return dentry_has_perm(current, NULL, dentry, FILE__GETATTR); + } + +-static int selinux_inode_removexattr(struct dentry *dentry, const char *name) ++static int selinux_inode_removexattr(struct dentry *dentry, ++ struct vfsmount *mnt, const char *name) + { + if (strcmp(name, XATTR_NAME_SELINUX)) + return selinux_inode_setotherxattr(dentry, name); +--- a/security/smack/smack_lsm.c ++++ b/security/smack/smack_lsm.c +@@ -687,13 +687,15 @@ static int smack_inode_getxattr(struct d + /* + * smack_inode_removexattr - Smack check on removexattr + * @dentry: the object ++ * @mnt: unused + * @name: name of the attribute + * + * Removing the Smack attribute requires CAP_MAC_ADMIN + * + * Returns 0 if access is permitted, an error code otherwise + */ +-static int smack_inode_removexattr(struct dentry *dentry, const char *name) ++static int smack_inode_removexattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name) + { + int rc = 0; + +@@ -703,7 +705,7 @@ static int smack_inode_removexattr(struc + if (!capable(CAP_MAC_ADMIN)) + rc = -EPERM; + } else +- rc = cap_inode_removexattr(dentry, name); ++ rc = cap_inode_removexattr(dentry, mnt, name); + + if (rc == 0) + rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/security-rename.diff b/src/patches/suse-2.6.27.25/patches.apparmor/security-rename.diff new file mode 100644 index 0000000000..b60915c3e9 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/security-rename.diff @@ -0,0 +1,160 @@ +From: Tony Jones +Subject: Pass struct vfsmount to the inode_rename LSM hook + +This is needed for computing pathnames in the AppArmor LSM. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/namei.c | 6 ++++-- + include/linux/security.h | 13 ++++++++++--- + security/capability.c | 3 ++- + security/security.c | 7 ++++--- + security/selinux/hooks.c | 8 ++++++-- + security/smack/smack_lsm.c | 6 +++++- + 6 files changed, 31 insertions(+), 12 deletions(-) + +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -2563,7 +2563,8 @@ static int vfs_rename_dir(struct inode * + return error; + } + +- error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); ++ error = security_inode_rename(old_dir, old_dentry, old_mnt, ++ new_dir, new_dentry, new_mnt); + if (error) + return error; + +@@ -2597,7 +2598,8 @@ static int vfs_rename_other(struct inode + struct inode *target; + int error; + +- error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); ++ error = security_inode_rename(old_dir, old_dentry, old_mnt, ++ new_dir, new_dentry, new_mnt); + if (error) + return error; + +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -390,8 +390,10 @@ static inline void security_free_mnt_opt + * Check for permission to rename a file or directory. + * @old_dir contains the inode structure for parent of the old link. + * @old_dentry contains the dentry structure of the old link. ++ * @old_mnt is the vfsmount corresponding to @old_dentry (may be NULL). + * @new_dir contains the inode structure for parent of the new link. + * @new_dentry contains the dentry structure of the new link. ++ * @new_mnt is the vfsmount corresponding to @new_dentry (may be NULL). + * Return 0 if permission is granted. + * @inode_readlink: + * Check the permission to read the symbolic link. +@@ -1380,7 +1382,9 @@ struct security_operations { + int (*inode_mknod) (struct inode *dir, struct dentry *dentry, + struct vfsmount *mnt, int mode, dev_t dev); + int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry); ++ struct vfsmount *old_mnt, ++ struct inode *new_dir, struct dentry *new_dentry, ++ struct vfsmount *new_mnt); + int (*inode_readlink) (struct dentry *dentry, struct vfsmount *mnt); + int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd); + int (*inode_permission) (struct inode *inode, int mask); +@@ -1653,7 +1657,8 @@ int security_inode_rmdir(struct inode *d + int security_inode_mknod(struct inode *dir, struct dentry *dentry, + struct vfsmount *mnt, int mode, dev_t dev); + int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry); ++ struct vfsmount *old_mnt, struct inode *new_dir, ++ struct dentry *new_dentry, struct vfsmount *new_mnt); + int security_inode_readlink(struct dentry *dentry, struct vfsmount *mnt); + int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd); + int security_inode_permission(struct inode *inode, int mask); +@@ -2045,8 +2050,10 @@ static inline int security_inode_mknod(s + + static inline int security_inode_rename(struct inode *old_dir, + struct dentry *old_dentry, ++ struct vfsmount *old_mnt, + struct inode *new_dir, +- struct dentry *new_dentry) ++ struct dentry *new_dentry, ++ struct vfsmount *new_mnt) + { + return 0; + } +--- a/security/capability.c ++++ b/security/capability.c +@@ -198,7 +198,8 @@ static int cap_inode_mknod(struct inode + } + + static int cap_inode_rename(struct inode *old_inode, struct dentry *old_dentry, +- struct inode *new_inode, struct dentry *new_dentry) ++ struct vfsmount *old_mnt, struct inode *new_inode, ++ struct dentry *new_dentry, struct vfsmount *new_mnt) + { + return 0; + } +--- a/security/security.c ++++ b/security/security.c +@@ -417,13 +417,14 @@ int security_inode_mknod(struct inode *d + } + + int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct vfsmount *old_mnt, struct inode *new_dir, ++ struct dentry *new_dentry, struct vfsmount *new_mnt) + { + if (unlikely(IS_PRIVATE(old_dentry->d_inode) || + (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode)))) + return 0; +- return security_ops->inode_rename(old_dir, old_dentry, +- new_dir, new_dentry); ++ return security_ops->inode_rename(old_dir, old_dentry, old_mnt, ++ new_dir, new_dentry, new_mnt); + } + + int security_inode_readlink(struct dentry *dentry, struct vfsmount *mnt) +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -2628,8 +2628,12 @@ static int selinux_inode_mknod(struct in + return may_create(dir, dentry, inode_mode_to_security_class(mode)); + } + +-static int selinux_inode_rename(struct inode *old_inode, struct dentry *old_dentry, +- struct inode *new_inode, struct dentry *new_dentry) ++static int selinux_inode_rename(struct inode *old_inode, ++ struct dentry *old_dentry, ++ struct vfsmount *old_mnt, ++ struct inode *new_inode, ++ struct dentry *new_dentry, ++ struct vfsmount *new_mnt) + { + return may_rename(old_inode, old_dentry, new_inode, new_dentry); + } +--- a/security/smack/smack_lsm.c ++++ b/security/smack/smack_lsm.c +@@ -509,8 +509,10 @@ static int smack_inode_rmdir(struct inod + * smack_inode_rename - Smack check on rename + * @old_inode: the old directory + * @old_dentry: unused ++ * @old_mnt: unused + * @new_inode: the new directory + * @new_dentry: unused ++ * @new_mnt: unused + * + * Read and write access is required on both the old and + * new directories. +@@ -519,8 +521,10 @@ static int smack_inode_rmdir(struct inod + */ + static int smack_inode_rename(struct inode *old_inode, + struct dentry *old_dentry, ++ struct vfsmount *old_mnt, + struct inode *new_inode, +- struct dentry *new_dentry) ++ struct dentry *new_dentry, ++ struct vfsmount *new_mnt) + { + int rc; + char *isp; diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/security-rmdir.diff b/src/patches/suse-2.6.27.25/patches.apparmor/security-rmdir.diff new file mode 100644 index 0000000000..683078fcce --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/security-rmdir.diff @@ -0,0 +1,127 @@ +From: Tony Jones +Subject: Pass struct vfsmount to the inode_rmdir LSM hook + +This is needed for computing pathnames in the AppArmor LSM. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/namei.c | 2 +- + include/linux/security.h | 10 +++++++--- + security/capability.c | 3 ++- + security/security.c | 5 +++-- + security/selinux/hooks.c | 3 ++- + security/smack/smack_lsm.c | 4 +++- + 6 files changed, 18 insertions(+), 9 deletions(-) + +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -2184,7 +2184,7 @@ int vfs_rmdir(struct inode *dir, struct + if (d_mountpoint(dentry)) + error = -EBUSY; + else { +- error = security_inode_rmdir(dir, dentry); ++ error = security_inode_rmdir(dir, dentry, mnt); + if (!error) { + error = dir->i_op->rmdir(dir, dentry); + if (!error) +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -372,6 +372,7 @@ static inline void security_free_mnt_opt + * Check the permission to remove a directory. + * @dir contains the inode structure of parent of the directory to be removed. + * @dentry contains the dentry structure of directory to be removed. ++ * @mnt is the vfsmount corresponding to @dentry (may be NULL). + * Return 0 if permission is granted. + * @inode_mknod: + * Check permissions when creating a special file (or a socket or a fifo +@@ -1372,7 +1373,8 @@ struct security_operations { + struct vfsmount *mnt, const char *old_name); + int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, + struct vfsmount *mnt, int mode); +- int (*inode_rmdir) (struct inode *dir, struct dentry *dentry); ++ int (*inode_rmdir) (struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt); + int (*inode_mknod) (struct inode *dir, struct dentry *dentry, + struct vfsmount *mnt, int mode, dev_t dev); + int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry, +@@ -1643,7 +1645,8 @@ int security_inode_symlink(struct inode + struct vfsmount *mnt, const char *old_name); + int security_inode_mkdir(struct inode *dir, struct dentry *dentry, + struct vfsmount *mnt, int mode); +-int security_inode_rmdir(struct inode *dir, struct dentry *dentry); ++int security_inode_rmdir(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt); + int security_inode_mknod(struct inode *dir, struct dentry *dentry, + struct vfsmount *mnt, int mode, dev_t dev); + int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, +@@ -2022,7 +2025,8 @@ static inline int security_inode_mkdir(s + } + + static inline int security_inode_rmdir(struct inode *dir, +- struct dentry *dentry) ++ struct dentry *dentry, ++ struct vfsmount *mnt) + { + return 0; + } +--- a/security/capability.c ++++ b/security/capability.c +@@ -184,7 +184,8 @@ static int cap_inode_mkdir(struct inode + return 0; + } + +-static int cap_inode_rmdir(struct inode *inode, struct dentry *dentry) ++static int cap_inode_rmdir(struct inode *inode, struct dentry *dentry, ++ struct vfsmount *mnt) + { + return 0; + } +--- a/security/security.c ++++ b/security/security.c +@@ -399,11 +399,12 @@ int security_inode_mkdir(struct inode *d + return security_ops->inode_mkdir(dir, dentry, mnt, mode); + } + +-int security_inode_rmdir(struct inode *dir, struct dentry *dentry) ++int security_inode_rmdir(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_rmdir(dir, dentry); ++ return security_ops->inode_rmdir(dir, dentry, mnt); + } + + int security_inode_mknod(struct inode *dir, struct dentry *dentry, +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -2609,7 +2609,8 @@ static int selinux_inode_mkdir(struct in + return may_create(dir, dentry, SECCLASS_DIR); + } + +-static int selinux_inode_rmdir(struct inode *dir, struct dentry *dentry) ++static int selinux_inode_rmdir(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt) + { + return may_link(dir, dentry, MAY_RMDIR); + } +--- a/security/smack/smack_lsm.c ++++ b/security/smack/smack_lsm.c +@@ -480,11 +480,13 @@ static int smack_inode_unlink(struct ino + * smack_inode_rmdir - Smack check on directory deletion + * @dir: containing directory object + * @dentry: directory to unlink ++ * @mnt: vfsmount @dentry to unlink + * + * Returns 0 if current can write the containing directory + * and the directory, error code otherwise + */ +-static int smack_inode_rmdir(struct inode *dir, struct dentry *dentry) ++static int smack_inode_rmdir(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt) + { + int rc; + diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/security-setattr.diff b/src/patches/suse-2.6.27.25/patches.apparmor/security-setattr.diff new file mode 100644 index 0000000000..601e9570a4 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/security-setattr.diff @@ -0,0 +1,146 @@ +From: Tony Jones +Subject: Pass struct vfsmount to the inode_setattr LSM hook + +This is needed for computing pathnames in the AppArmor LSM. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/attr.c | 4 ++-- + fs/fat/file.c | 2 +- + include/linux/security.h | 10 +++++++--- + security/capability.c | 3 ++- + security/security.c | 5 +++-- + security/selinux/hooks.c | 5 +++-- + security/smack/smack_lsm.c | 3 ++- + 7 files changed, 20 insertions(+), 12 deletions(-) + +--- a/fs/attr.c ++++ b/fs/attr.c +@@ -164,13 +164,13 @@ int notify_change(struct dentry *dentry, + down_write(&dentry->d_inode->i_alloc_sem); + + if (inode->i_op && inode->i_op->setattr) { +- error = security_inode_setattr(dentry, attr); ++ error = security_inode_setattr(dentry, mnt, attr); + if (!error) + error = inode->i_op->setattr(dentry, attr); + } else { + error = inode_change_ok(inode, attr); + if (!error) +- error = security_inode_setattr(dentry, attr); ++ error = security_inode_setattr(dentry, mnt, attr); + if (!error) { + if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) +--- a/fs/fat/file.c ++++ b/fs/fat/file.c +@@ -98,7 +98,7 @@ int fat_generic_ioctl(struct inode *inod + * out the RO attribute for checking by the security + * module, just because it maps to a file mode. + */ +- err = security_inode_setattr(filp->f_path.dentry, &ia); ++ err = security_inode_setattr(filp->f_path.dentry, filp->f_path.mnt, &ia); + if (err) + goto up; + +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -412,6 +412,7 @@ static inline void security_free_mnt_opt + * file attributes change (such as when a file is truncated, chown/chmod + * operations, transferring disk quotas, etc). + * @dentry contains the dentry structure for the file. ++ * @mnt is the vfsmount corresponding to @dentry (may be NULL). + * @attr is the iattr structure containing the new file attributes. + * Return 0 if permission is granted. + * @inode_getattr: +@@ -1371,7 +1372,8 @@ struct security_operations { + int (*inode_readlink) (struct dentry *dentry); + int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd); + int (*inode_permission) (struct inode *inode, int mask); +- int (*inode_setattr) (struct dentry *dentry, struct iattr *attr); ++ int (*inode_setattr) (struct dentry *dentry, struct vfsmount *, ++ struct iattr *attr); + int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry); + void (*inode_delete) (struct inode *inode); + int (*inode_setxattr) (struct dentry *dentry, const char *name, +@@ -1638,7 +1640,8 @@ int security_inode_rename(struct inode * + int security_inode_readlink(struct dentry *dentry); + int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd); + int security_inode_permission(struct inode *inode, int mask); +-int security_inode_setattr(struct dentry *dentry, struct iattr *attr); ++int security_inode_setattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct iattr *attr); + int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry); + void security_inode_delete(struct inode *inode); + int security_inode_setxattr(struct dentry *dentry, const char *name, +@@ -2041,7 +2044,8 @@ static inline int security_inode_permiss + } + + static inline int security_inode_setattr(struct dentry *dentry, +- struct iattr *attr) ++ struct vfsmount *mnt, ++ struct iattr *attr) + { + return 0; + } +--- a/security/capability.c ++++ b/security/capability.c +@@ -216,7 +216,8 @@ static int cap_inode_permission(struct i + return 0; + } + +-static int cap_inode_setattr(struct dentry *dentry, struct iattr *iattr) ++static int cap_inode_setattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct iattr *iattr) + { + return 0; + } +--- a/security/security.c ++++ b/security/security.c +@@ -441,11 +441,12 @@ int security_inode_permission(struct ino + return security_ops->inode_permission(inode, mask); + } + +-int security_inode_setattr(struct dentry *dentry, struct iattr *attr) ++int security_inode_setattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct iattr *attr) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_setattr(dentry, attr); ++ return security_ops->inode_setattr(dentry, mnt, attr); + } + EXPORT_SYMBOL_GPL(security_inode_setattr); + +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -2656,11 +2656,12 @@ static int selinux_inode_permission(stru + open_file_mask_to_av(inode->i_mode, mask), NULL); + } + +-static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr) ++static int selinux_inode_setattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct iattr *iattr) + { + int rc; + +- rc = secondary_ops->inode_setattr(dentry, iattr); ++ rc = secondary_ops->inode_setattr(dentry, mnt, iattr); + if (rc) + return rc; + +--- a/security/smack/smack_lsm.c ++++ b/security/smack/smack_lsm.c +@@ -559,7 +559,8 @@ static int smack_inode_permission(struct + * + * Returns 0 if access is permitted, an error code otherwise + */ +-static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr) ++static int smack_inode_setattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct iattr *iattr) + { + /* + * Need to allow for clearing the setuid bit. diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/security-setxattr.diff b/src/patches/suse-2.6.27.25/patches.apparmor/security-setxattr.diff new file mode 100644 index 0000000000..87db517484 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/security-setxattr.diff @@ -0,0 +1,256 @@ +From: Tony Jones +Subject: Pass struct vfsmount to the inode_setxattr LSM hook + +This is needed for computing pathnames in the AppArmor LSM. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/xattr.c | 4 ++-- + include/linux/security.h | 41 ++++++++++++++++++++++++++--------------- + security/capability.c | 3 ++- + security/commoncap.c | 5 +++-- + security/security.c | 16 ++++++++++------ + security/selinux/hooks.c | 8 +++++--- + security/smack/smack_lsm.c | 12 ++++++++---- + 7 files changed, 56 insertions(+), 33 deletions(-) + +--- a/fs/xattr.c ++++ b/fs/xattr.c +@@ -78,7 +78,7 @@ vfs_setxattr(struct dentry *dentry, stru + return error; + + mutex_lock(&inode->i_mutex); +- error = security_inode_setxattr(dentry, name, value, size, flags); ++ error = security_inode_setxattr(dentry, mnt, name, value, size, flags); + if (error) + goto out; + error = -EOPNOTSUPP; +@@ -86,7 +86,7 @@ vfs_setxattr(struct dentry *dentry, stru + error = inode->i_op->setxattr(dentry, name, value, size, flags); + if (!error) { + fsnotify_xattr(dentry); +- security_inode_post_setxattr(dentry, name, value, ++ security_inode_post_setxattr(dentry, mnt, name, value, + size, flags); + } + } else if (!strncmp(name, XATTR_SECURITY_PREFIX, +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -54,8 +54,9 @@ extern void cap_capset_set(struct task_s + extern int cap_bprm_set_security(struct linux_binprm *bprm); + extern void cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); + extern int cap_bprm_secureexec(struct linux_binprm *bprm); +-extern int cap_inode_setxattr(struct dentry *dentry, const char *name, +- const void *value, size_t size, int flags); ++extern int cap_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name, const void *value, size_t size, ++ int flags); + extern int cap_inode_removexattr(struct dentry *dentry, const char *name); + extern int cap_inode_need_killpriv(struct dentry *dentry); + extern int cap_inode_killpriv(struct dentry *dentry); +@@ -438,11 +439,11 @@ static inline void security_free_mnt_opt + * inode. + * @inode_setxattr: + * Check permission before setting the extended attributes +- * @value identified by @name for @dentry. ++ * @value identified by @name for @dentry and @mnt. + * Return 0 if permission is granted. + * @inode_post_setxattr: + * Update inode security field after successful setxattr operation. +- * @value identified by @name for @dentry. ++ * @value identified by @name for @dentry and @mnt. + * @inode_getxattr: + * Check permission before obtaining the extended attributes + * identified by @name for @dentry. +@@ -1392,10 +1393,13 @@ struct security_operations { + struct iattr *attr); + int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry); + void (*inode_delete) (struct inode *inode); +- int (*inode_setxattr) (struct dentry *dentry, const char *name, +- const void *value, size_t size, int flags); +- void (*inode_post_setxattr) (struct dentry *dentry, const char *name, +- const void *value, size_t size, int flags); ++ int (*inode_setxattr) (struct dentry *dentry, struct vfsmount *mnt, ++ const char *name, const void *value, size_t size, ++ int flags); ++ void (*inode_post_setxattr) (struct dentry *dentry, ++ struct vfsmount *mnt, ++ const char *name, const void *value, ++ size_t size, int flags); + int (*inode_getxattr) (struct dentry *dentry, const char *name); + int (*inode_listxattr) (struct dentry *dentry); + int (*inode_removexattr) (struct dentry *dentry, const char *name); +@@ -1666,10 +1670,12 @@ int security_inode_setattr(struct dentry + struct iattr *attr); + int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry); + void security_inode_delete(struct inode *inode); +-int security_inode_setxattr(struct dentry *dentry, const char *name, +- const void *value, size_t size, int flags); +-void security_inode_post_setxattr(struct dentry *dentry, const char *name, +- const void *value, size_t size, int flags); ++int security_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name, const void *value, ++ size_t size, int flags); ++void security_inode_post_setxattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name, const void *value, ++ size_t size, int flags); + int security_inode_getxattr(struct dentry *dentry, const char *name); + int security_inode_listxattr(struct dentry *dentry); + int security_inode_removexattr(struct dentry *dentry, const char *name); +@@ -2092,13 +2098,18 @@ static inline void security_inode_delete + { } + + static inline int security_inode_setxattr(struct dentry *dentry, +- const char *name, const void *value, size_t size, int flags) ++ struct vfsmount *mnt, ++ const char *name, const void *value, ++ size_t size, int flags) + { +- return cap_inode_setxattr(dentry, name, value, size, flags); ++ return cap_inode_setxattr(dentry, mnt, name, value, size, flags); + } + + static inline void security_inode_post_setxattr(struct dentry *dentry, +- const char *name, const void *value, size_t size, int flags) ++ struct vfsmount *mnt, ++ const char *name, ++ const void *value, ++ size_t size, int flags) + { } + + static inline int security_inode_getxattr(struct dentry *dentry, +--- a/security/capability.c ++++ b/security/capability.c +@@ -235,7 +235,8 @@ static void cap_inode_delete(struct inod + { + } + +-static void cap_inode_post_setxattr(struct dentry *dentry, const char *name, ++static void cap_inode_post_setxattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name, + const void *value, size_t size, int flags) + { + } +--- a/security/commoncap.c ++++ b/security/commoncap.c +@@ -414,8 +414,9 @@ int cap_bprm_secureexec (struct linux_bi + current->egid != current->gid); + } + +-int cap_inode_setxattr(struct dentry *dentry, const char *name, +- const void *value, size_t size, int flags) ++int cap_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name, const void *value, size_t size, ++ int flags) + { + if (!strcmp(name, XATTR_NAME_CAPS)) { + if (!capable(CAP_SETFCAP)) +--- a/security/security.c ++++ b/security/security.c +@@ -471,20 +471,24 @@ void security_inode_delete(struct inode + security_ops->inode_delete(inode); + } + +-int security_inode_setxattr(struct dentry *dentry, const char *name, +- const void *value, size_t size, int flags) ++int security_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name, const void *value, size_t size, ++ int flags) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_setxattr(dentry, name, value, size, flags); ++ return security_ops->inode_setxattr(dentry, mnt, name, value, size, ++ flags); + } + +-void security_inode_post_setxattr(struct dentry *dentry, const char *name, +- const void *value, size_t size, int flags) ++void security_inode_post_setxattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name, const void *value, ++ size_t size, int flags) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return; +- security_ops->inode_post_setxattr(dentry, name, value, size, flags); ++ security_ops->inode_post_setxattr(dentry, mnt, name, value, size, ++ flags); + } + + int security_inode_getxattr(struct dentry *dentry, const char *name) +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -2713,8 +2713,9 @@ static int selinux_inode_setotherxattr(s + return dentry_has_perm(current, NULL, dentry, FILE__SETATTR); + } + +-static int selinux_inode_setxattr(struct dentry *dentry, const char *name, +- const void *value, size_t size, int flags) ++static int selinux_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name, const void *value, ++ size_t size, int flags) + { + struct task_security_struct *tsec = current->security; + struct inode *inode = dentry->d_inode; +@@ -2768,7 +2769,8 @@ static int selinux_inode_setxattr(struct + &ad); + } + +-static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name, ++static void selinux_inode_post_setxattr(struct dentry *dentry, ++ struct vfsmount *mnt, const char *name, + const void *value, size_t size, + int flags) + { +--- a/security/smack/smack_lsm.c ++++ b/security/smack/smack_lsm.c +@@ -595,6 +595,7 @@ static int smack_inode_getattr(struct vf + /** + * smack_inode_setxattr - Smack check for setting xattrs + * @dentry: the object ++ * @mnt: unused + * @name: name of the attribute + * @value: unused + * @size: unused +@@ -604,8 +605,9 @@ static int smack_inode_getattr(struct vf + * + * Returns 0 if access is permitted, an error code otherwise + */ +-static int smack_inode_setxattr(struct dentry *dentry, const char *name, +- const void *value, size_t size, int flags) ++static int smack_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char *name, const void *value, ++ size_t size, int flags) + { + int rc = 0; + +@@ -617,7 +619,7 @@ static int smack_inode_setxattr(struct d + if (size == 0) + rc = -EINVAL; + } else +- rc = cap_inode_setxattr(dentry, name, value, size, flags); ++ rc = cap_inode_setxattr(dentry, mnt, name, value, size, flags); + + if (rc == 0) + rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); +@@ -628,6 +630,7 @@ static int smack_inode_setxattr(struct d + /** + * smack_inode_post_setxattr - Apply the Smack update approved above + * @dentry: object ++ * @mnt: unused + * @name: attribute name + * @value: attribute value + * @size: attribute size +@@ -636,7 +639,8 @@ static int smack_inode_setxattr(struct d + * Set the pointer in the inode blob to the entry found + * in the master label list. + */ +-static void smack_inode_post_setxattr(struct dentry *dentry, const char *name, ++static void smack_inode_post_setxattr(struct dentry *dentry, ++ struct vfsmount *mnt, const char *name, + const void *value, size_t size, int flags) + { + struct inode_smack *isp; diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/security-symlink.diff b/src/patches/suse-2.6.27.25/patches.apparmor/security-symlink.diff new file mode 100644 index 0000000000..e855eda546 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/security-symlink.diff @@ -0,0 +1,105 @@ +From: Tony Jones +Subject: Pass struct vfsmount to the inode_symlink LSM hook + +This is needed for computing pathnames in the AppArmor LSM. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/namei.c | 2 +- + include/linux/security.h | 8 +++++--- + security/capability.c | 2 +- + security/security.c | 4 ++-- + security/selinux/hooks.c | 3 ++- + 5 files changed, 11 insertions(+), 8 deletions(-) + +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -2358,7 +2358,7 @@ int vfs_symlink(struct inode *dir, struc + if (!dir->i_op || !dir->i_op->symlink) + return -EPERM; + +- error = security_inode_symlink(dir, dentry, oldname); ++ error = security_inode_symlink(dir, dentry, mnt, oldname); + if (error) + return error; + +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -355,6 +355,7 @@ static inline void security_free_mnt_opt + * Check the permission to create a symbolic link to a file. + * @dir contains the inode structure of parent directory of the symbolic link. + * @dentry contains the dentry structure of the symbolic link. ++ * @mnt is the vfsmount corresponding to @dentry (may be NULL). + * @old_name contains the pathname of file. + * Return 0 if permission is granted. + * @inode_mkdir: +@@ -1363,8 +1364,8 @@ struct security_operations { + int (*inode_link) (struct dentry *old_dentry, + struct inode *dir, struct dentry *new_dentry); + int (*inode_unlink) (struct inode *dir, struct dentry *dentry); +- int (*inode_symlink) (struct inode *dir, +- struct dentry *dentry, const char *old_name); ++ int (*inode_symlink) (struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, const char *old_name); + int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, + struct vfsmount *mnt, int mode); + int (*inode_rmdir) (struct inode *dir, struct dentry *dentry); +@@ -1634,7 +1635,7 @@ int security_inode_link(struct dentry *o + struct dentry *new_dentry); + int security_inode_unlink(struct inode *dir, struct dentry *dentry); + int security_inode_symlink(struct inode *dir, struct dentry *dentry, +- const char *old_name); ++ struct vfsmount *mnt, const char *old_name); + int security_inode_mkdir(struct inode *dir, struct dentry *dentry, + struct vfsmount *mnt, int mode); + int security_inode_rmdir(struct inode *dir, struct dentry *dentry); +@@ -1999,6 +2000,7 @@ static inline int security_inode_unlink( + + static inline int security_inode_symlink(struct inode *dir, + struct dentry *dentry, ++ struct vfsmount *mnt, + const char *old_name) + { + return 0; +--- a/security/capability.c ++++ b/security/capability.c +@@ -172,7 +172,7 @@ static int cap_inode_unlink(struct inode + } + + static int cap_inode_symlink(struct inode *inode, struct dentry *dentry, +- const char *name) ++ struct vfsmount *mnt, const char *name) + { + return 0; + } +--- a/security/security.c ++++ b/security/security.c +@@ -382,11 +382,11 @@ int security_inode_unlink(struct inode * + } + + int security_inode_symlink(struct inode *dir, struct dentry *dentry, +- const char *old_name) ++ struct vfsmount *mnt, const char *old_name) + { + if (unlikely(IS_PRIVATE(dir))) + return 0; +- return security_ops->inode_symlink(dir, dentry, old_name); ++ return security_ops->inode_symlink(dir, dentry, mnt, old_name); + } + + int security_inode_mkdir(struct inode *dir, struct dentry *dentry, +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -2592,7 +2592,8 @@ static int selinux_inode_unlink(struct i + return may_link(dir, dentry, MAY_UNLINK); + } + +-static int selinux_inode_symlink(struct inode *dir, struct dentry *dentry, const char *name) ++static int selinux_inode_symlink(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt, const char *name) + { + return may_create(dir, dentry, SECCLASS_LNK_FILE); + } diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/security-unlink.diff b/src/patches/suse-2.6.27.25/patches.apparmor/security-unlink.diff new file mode 100644 index 0000000000..71764b4678 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/security-unlink.diff @@ -0,0 +1,132 @@ +From: Tony Jones +Subject: Pass struct vfsmount to the inode_unlink LSM hook + +This is needed for computing pathnames in the AppArmor LSM. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/namei.c | 2 +- + include/linux/security.h | 10 +++++++--- + security/capability.c | 3 ++- + security/security.c | 5 +++-- + security/selinux/hooks.c | 5 +++-- + security/smack/smack_lsm.c | 4 +++- + 6 files changed, 19 insertions(+), 10 deletions(-) + +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -2264,7 +2264,7 @@ int vfs_unlink(struct inode *dir, struct + if (d_mountpoint(dentry)) + error = -EBUSY; + else { +- error = security_inode_unlink(dir, dentry); ++ error = security_inode_unlink(dir, dentry, mnt); + if (!error) + error = dir->i_op->unlink(dir, dentry); + } +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -352,6 +352,7 @@ static inline void security_free_mnt_opt + * Check the permission to remove a hard link to a file. + * @dir contains the inode structure of parent directory of the file. + * @dentry contains the dentry structure for file to be unlinked. ++ * @mnt is the vfsmount corresponding to @dentry (may be NULL). + * Return 0 if permission is granted. + * @inode_symlink: + * Check the permission to create a symbolic link to a file. +@@ -1368,7 +1369,8 @@ struct security_operations { + int (*inode_link) (struct dentry *old_dentry, struct vfsmount *old_mnt, + struct inode *dir, struct dentry *new_dentry, + struct vfsmount *new_mnt); +- int (*inode_unlink) (struct inode *dir, struct dentry *dentry); ++ int (*inode_unlink) (struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt); + int (*inode_symlink) (struct inode *dir, struct dentry *dentry, + struct vfsmount *mnt, const char *old_name); + int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, +@@ -1640,7 +1642,8 @@ int security_inode_create(struct inode * + int security_inode_link(struct dentry *old_dentry, struct vfsmount *old_mnt, + struct inode *dir, struct dentry *new_dentry, + struct vfsmount *new_mnt); +-int security_inode_unlink(struct inode *dir, struct dentry *dentry); ++int security_inode_unlink(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt); + int security_inode_symlink(struct inode *dir, struct dentry *dentry, + struct vfsmount *mnt, const char *old_name); + int security_inode_mkdir(struct inode *dir, struct dentry *dentry, +@@ -2003,7 +2006,8 @@ static inline int security_inode_link(st + } + + static inline int security_inode_unlink(struct inode *dir, +- struct dentry *dentry) ++ struct dentry *dentry, ++ struct vfsmount *mnt) + { + return 0; + } +--- a/security/capability.c ++++ b/security/capability.c +@@ -167,7 +167,8 @@ static int cap_inode_link(struct dentry + return 0; + } + +-static int cap_inode_unlink(struct inode *inode, struct dentry *dentry) ++static int cap_inode_unlink(struct inode *inode, struct dentry *dentry, ++ struct vfsmount *mnt) + { + return 0; + } +--- a/security/security.c ++++ b/security/security.c +@@ -376,11 +376,12 @@ int security_inode_link(struct dentry *o + new_dentry, new_mnt); + } + +-int security_inode_unlink(struct inode *dir, struct dentry *dentry) ++int security_inode_unlink(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_unlink(dir, dentry); ++ return security_ops->inode_unlink(dir, dentry, mnt); + } + + int security_inode_symlink(struct inode *dir, struct dentry *dentry, +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -2587,11 +2587,12 @@ static int selinux_inode_link(struct den + return may_link(dir, old_dentry, MAY_LINK); + } + +-static int selinux_inode_unlink(struct inode *dir, struct dentry *dentry) ++static int selinux_inode_unlink(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt) + { + int rc; + +- rc = secondary_ops->inode_unlink(dir, dentry); ++ rc = secondary_ops->inode_unlink(dir, dentry, mnt); + if (rc) + return rc; + return may_link(dir, dentry, MAY_UNLINK); +--- a/security/smack/smack_lsm.c ++++ b/security/smack/smack_lsm.c +@@ -454,11 +454,13 @@ static int smack_inode_link(struct dentr + * smack_inode_unlink - Smack check on inode deletion + * @dir: containing directory object + * @dentry: file to unlink ++ * @mnt: vfsmount of file to unlink + * + * Returns 0 if current can write the containing directory + * and the object, error code otherwise + */ +-static int smack_inode_unlink(struct inode *dir, struct dentry *dentry) ++static int smack_inode_unlink(struct inode *dir, struct dentry *dentry, ++ struct vfsmount *mnt) + { + struct inode *ip = dentry->d_inode; + int rc; diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/security-xattr-file.diff b/src/patches/suse-2.6.27.25/patches.apparmor/security-xattr-file.diff new file mode 100644 index 0000000000..a4fb9c887d --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/security-xattr-file.diff @@ -0,0 +1,592 @@ +From: Andreas Gruenbacher +Subject: Pass struct file down the inode_*xattr security LSM hooks + +This allows LSMs to also distinguish between file descriptor and path +access for the xattr operations. (The other relevant operations are +covered by the setattr hook.) + +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/xattr.c | 59 +++++++++++++++++++++++---------------------- + include/linux/security.h | 38 ++++++++++++++++------------ + include/linux/xattr.h | 9 +++--- + security/capability.c | 5 ++- + security/commoncap.c | 4 +-- + security/security.c | 17 ++++++------ + security/selinux/hooks.c | 10 ++++--- + security/smack/smack_lsm.c | 14 ++++++---- + 8 files changed, 87 insertions(+), 69 deletions(-) + +--- a/fs/xattr.c ++++ b/fs/xattr.c +@@ -68,7 +68,7 @@ xattr_permission(struct inode *inode, co + + int + vfs_setxattr(struct dentry *dentry, struct vfsmount *mnt, const char *name, +- const void *value, size_t size, int flags) ++ const void *value, size_t size, int flags, struct file *file) + { + struct inode *inode = dentry->d_inode; + int error; +@@ -78,7 +78,7 @@ vfs_setxattr(struct dentry *dentry, stru + return error; + + mutex_lock(&inode->i_mutex); +- error = security_inode_setxattr(dentry, mnt, name, value, size, flags); ++ error = security_inode_setxattr(dentry, mnt, name, value, size, flags, file); + if (error) + goto out; + error = -EOPNOTSUPP; +@@ -132,7 +132,7 @@ EXPORT_SYMBOL_GPL(xattr_getsecurity); + + ssize_t + vfs_getxattr(struct dentry *dentry, struct vfsmount *mnt, const char *name, +- void *value, size_t size) ++ void *value, size_t size, struct file *file) + { + struct inode *inode = dentry->d_inode; + int error; +@@ -141,7 +141,7 @@ vfs_getxattr(struct dentry *dentry, stru + if (error) + return error; + +- error = security_inode_getxattr(dentry, mnt, name); ++ error = security_inode_getxattr(dentry, mnt, name, file); + if (error) + return error; + +@@ -169,12 +169,12 @@ EXPORT_SYMBOL_GPL(vfs_getxattr); + + ssize_t + vfs_listxattr(struct dentry *dentry, struct vfsmount *mnt, char *list, +- size_t size) ++ size_t size, struct file *file) + { + struct inode *inode = dentry->d_inode; + ssize_t error; + +- error = security_inode_listxattr(dentry, mnt); ++ error = security_inode_listxattr(dentry, mnt, file); + if (error) + return error; + error = -EOPNOTSUPP; +@@ -190,7 +190,8 @@ vfs_listxattr(struct dentry *dentry, str + EXPORT_SYMBOL_GPL(vfs_listxattr); + + int +-vfs_removexattr(struct dentry *dentry, struct vfsmount *mnt, const char *name) ++vfs_removexattr(struct dentry *dentry, struct vfsmount *mnt, const char *name, ++ struct file *file) + { + struct inode *inode = dentry->d_inode; + int error; +@@ -202,7 +203,7 @@ vfs_removexattr(struct dentry *dentry, s + if (error) + return error; + +- error = security_inode_removexattr(dentry, mnt, name); ++ error = security_inode_removexattr(dentry, mnt, name, file); + if (error) + return error; + +@@ -222,7 +223,7 @@ EXPORT_SYMBOL_GPL(vfs_removexattr); + */ + static long + setxattr(struct dentry *dentry, struct vfsmount *mnt, const char __user *name, +- const void __user *value, size_t size, int flags) ++ const void __user *value, size_t size, int flags, struct file *file) + { + int error; + void *kvalue = NULL; +@@ -249,7 +250,7 @@ setxattr(struct dentry *dentry, struct v + } + } + +- error = vfs_setxattr(dentry, mnt, kname, kvalue, size, flags); ++ error = vfs_setxattr(dentry, mnt, kname, kvalue, size, flags, file); + kfree(kvalue); + return error; + } +@@ -266,7 +267,7 @@ SYSCALL_DEFINE5(setxattr, const char __u + return error; + error = mnt_want_write(path.mnt); + if (!error) { +- error = setxattr(path.dentry, path.mnt, name, value, size, flags); ++ error = setxattr(path.dentry, path.mnt, name, value, size, flags, NULL); + mnt_drop_write(path.mnt); + } + path_put(&path); +@@ -285,7 +286,7 @@ SYSCALL_DEFINE5(lsetxattr, const char __ + return error; + error = mnt_want_write(path.mnt); + if (!error) { +- error = setxattr(path.dentry, path.mnt, name, value, size, flags); ++ error = setxattr(path.dentry, path.mnt, name, value, size, flags, NULL); + mnt_drop_write(path.mnt); + } + path_put(&path); +@@ -306,7 +307,8 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, cons + audit_inode(NULL, dentry); + error = mnt_want_write_file(f->f_path.mnt, f); + if (!error) { +- error = setxattr(dentry, f->f_vfsmnt, name, value, size, flags); ++ error = setxattr(dentry, f->f_vfsmnt, name, value, size, flags, ++ f); + mnt_drop_write(f->f_path.mnt); + } + fput(f); +@@ -318,7 +320,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, cons + */ + static ssize_t + getxattr(struct dentry *dentry, struct vfsmount *mnt, const char __user *name, +- void __user *value, size_t size) ++ void __user *value, size_t size, struct file *file) + { + ssize_t error; + void *kvalue = NULL; +@@ -338,7 +340,7 @@ getxattr(struct dentry *dentry, struct v + return -ENOMEM; + } + +- error = vfs_getxattr(dentry, mnt, kname, kvalue, size); ++ error = vfs_getxattr(dentry, mnt, kname, kvalue, size, file); + if (error > 0) { + if (size && copy_to_user(value, kvalue, error)) + error = -EFAULT; +@@ -360,7 +362,7 @@ SYSCALL_DEFINE4(getxattr, const char __u + error = user_path(pathname, &path); + if (error) + return error; +- error = getxattr(path.dentry, path.mnt, name, value, size); ++ error = getxattr(path.dentry, path.mnt, name, value, size, NULL); + path_put(&path); + return error; + } +@@ -374,7 +376,7 @@ SYSCALL_DEFINE4(lgetxattr, const char __ + error = user_lpath(pathname, &path); + if (error) + return error; +- error = getxattr(path.dentry, path.mnt, name, value, size); ++ error = getxattr(path.dentry, path.mnt, name, value, size, NULL); + path_put(&path); + return error; + } +@@ -389,7 +391,7 @@ SYSCALL_DEFINE4(fgetxattr, int, fd, cons + if (!f) + return error; + audit_inode(NULL, f->f_path.dentry); +- error = getxattr(f->f_path.dentry, f->f_path.mnt, name, value, size); ++ error = getxattr(f->f_path.dentry, f->f_path.mnt, name, value, size, f); + fput(f); + return error; + } +@@ -399,7 +401,7 @@ SYSCALL_DEFINE4(fgetxattr, int, fd, cons + */ + static ssize_t + listxattr(struct dentry *dentry, struct vfsmount *mnt, char __user *list, +- size_t size) ++ size_t size, struct file *file) + { + ssize_t error; + char *klist = NULL; +@@ -412,7 +414,7 @@ listxattr(struct dentry *dentry, struct + return -ENOMEM; + } + +- error = vfs_listxattr(dentry, mnt, klist, size); ++ error = vfs_listxattr(dentry, mnt, klist, size, file); + if (error > 0) { + if (size && copy_to_user(list, klist, error)) + error = -EFAULT; +@@ -434,7 +436,7 @@ SYSCALL_DEFINE3(listxattr, const char __ + error = user_path(pathname, &path); + if (error) + return error; +- error = listxattr(path.dentry, path.mnt, list, size); ++ error = listxattr(path.dentry, path.mnt, list, size, NULL); + path_put(&path); + return error; + } +@@ -448,7 +450,7 @@ SYSCALL_DEFINE3(llistxattr, const char _ + error = user_lpath(pathname, &path); + if (error) + return error; +- error = listxattr(path.dentry, path.mnt, list, size); ++ error = listxattr(path.dentry, path.mnt, list, size, NULL); + path_put(&path); + return error; + } +@@ -462,7 +464,7 @@ SYSCALL_DEFINE3(flistxattr, int, fd, cha + if (!f) + return error; + audit_inode(NULL, f->f_path.dentry); +- error = listxattr(f->f_path.dentry, f->f_path.mnt, list, size); ++ error = listxattr(f->f_path.dentry, f->f_path.mnt, list, size, f); + fput(f); + return error; + } +@@ -471,7 +473,8 @@ SYSCALL_DEFINE3(flistxattr, int, fd, cha + * Extended attribute REMOVE operations + */ + static long +-removexattr(struct dentry *dentry, struct vfsmount *mnt, const char __user *name) ++removexattr(struct dentry *dentry, struct vfsmount *mnt, ++ const char __user *name, struct file *file) + { + int error; + char kname[XATTR_NAME_MAX + 1]; +@@ -482,7 +485,7 @@ removexattr(struct dentry *dentry, struc + if (error < 0) + return error; + +- return vfs_removexattr(dentry, mnt, kname); ++ return vfs_removexattr(dentry, mnt, kname, file); + } + + SYSCALL_DEFINE2(removexattr, const char __user *, pathname, +@@ -496,7 +499,7 @@ SYSCALL_DEFINE2(removexattr, const char + return error; + error = mnt_want_write(path.mnt); + if (!error) { +- error = removexattr(path.dentry, path.mnt, name); ++ error = removexattr(path.dentry, path.mnt, name, NULL); + mnt_drop_write(path.mnt); + } + path_put(&path); +@@ -514,7 +517,7 @@ SYSCALL_DEFINE2(lremovexattr, const char + return error; + error = mnt_want_write(path.mnt); + if (!error) { +- error = removexattr(path.dentry, path.mnt, name); ++ error = removexattr(path.dentry, path.mnt, name, NULL); + mnt_drop_write(path.mnt); + } + path_put(&path); +@@ -534,7 +537,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, c + audit_inode(NULL, dentry); + error = mnt_want_write_file(f->f_path.mnt, f); + if (!error) { +- error = removexattr(dentry, f->f_path.mnt, name); ++ error = removexattr(dentry, f->f_path.mnt, name, f); + mnt_drop_write(f->f_path.mnt); + } + fput(f); +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -56,9 +56,9 @@ extern void cap_bprm_apply_creds(struct + extern int cap_bprm_secureexec(struct linux_binprm *bprm); + extern int cap_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, + const char *name, const void *value, size_t size, +- int flags); ++ int flags, struct file *file); + extern int cap_inode_removexattr(struct dentry *dentry, struct vfsmount *mnt, +- const char *name); ++ const char *name, struct file *file); + extern int cap_inode_need_killpriv(struct dentry *dentry); + extern int cap_inode_killpriv(struct dentry *dentry); + extern int cap_task_post_setuid(uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); +@@ -1396,16 +1396,17 @@ struct security_operations { + void (*inode_delete) (struct inode *inode); + int (*inode_setxattr) (struct dentry *dentry, struct vfsmount *mnt, + const char *name, const void *value, size_t size, +- int flags); ++ int flags, struct file *file); + void (*inode_post_setxattr) (struct dentry *dentry, + struct vfsmount *mnt, + const char *name, const void *value, + size_t size, int flags); + int (*inode_getxattr) (struct dentry *dentry, struct vfsmount *mnt, +- const char *name); +- int (*inode_listxattr) (struct dentry *dentry, struct vfsmount *mnt); ++ const char *name, struct file *file); ++ int (*inode_listxattr) (struct dentry *dentry, struct vfsmount *mnt, ++ struct file *file); + int (*inode_removexattr) (struct dentry *dentry, struct vfsmount *mnt, +- const char *name); ++ const char *name, struct file *file); + int (*inode_need_killpriv) (struct dentry *dentry); + int (*inode_killpriv) (struct dentry *dentry); + int (*inode_getsecurity) (const struct inode *inode, const char *name, void **buffer, bool alloc); +@@ -1675,15 +1676,16 @@ int security_inode_getattr(struct vfsmou + void security_inode_delete(struct inode *inode); + int security_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, + const char *name, const void *value, +- size_t size, int flags); ++ size_t size, int flags, struct file *file); + void security_inode_post_setxattr(struct dentry *dentry, struct vfsmount *mnt, + const char *name, const void *value, + size_t size, int flags); + int security_inode_getxattr(struct dentry *dentry, struct vfsmount *mnt, +- const char *name); +-int security_inode_listxattr(struct dentry *dentry, struct vfsmount *mnt); ++ const char *name, struct file *file); ++int security_inode_listxattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct file *file); + int security_inode_removexattr(struct dentry *dentry, struct vfsmount *mnt, +- const char *name); ++ const char *name, struct file *file); + int security_inode_need_killpriv(struct dentry *dentry); + int security_inode_killpriv(struct dentry *dentry); + int security_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc); +@@ -2105,9 +2107,10 @@ static inline void security_inode_delete + static inline int security_inode_setxattr(struct dentry *dentry, + struct vfsmount *mnt, + const char *name, const void *value, +- size_t size, int flags) ++ size_t size, int flags, ++ struct file *file) + { +- return cap_inode_setxattr(dentry, mnt, name, value, size, flags); ++ return cap_inode_setxattr(dentry, mnt, name, value, size, flags, file); + } + + static inline void security_inode_post_setxattr(struct dentry *dentry, +@@ -2119,22 +2122,25 @@ static inline void security_inode_post_s + + static inline int security_inode_getxattr(struct dentry *dentry, + struct vfsmount *mnt, +- const char *name) ++ const char *name, ++ struct file *file) + { + return 0; + } + + static inline int security_inode_listxattr(struct dentry *dentry, +- struct vfsmount *mnt) ++ struct vfsmount *mnt, ++ struct file *file) + { + return 0; + } + + static inline int security_inode_removexattr(struct dentry *dentry, + struct vfsmount *mnt, +- const char *name) ++ const char *name, ++ struct file *file) + { +- return cap_inode_removexattr(dentry, mnt, name); ++ return cap_inode_removexattr(dentry, mnt, name, file); + } + + static inline int security_inode_need_killpriv(struct dentry *dentry) +--- a/include/linux/xattr.h ++++ b/include/linux/xattr.h +@@ -17,6 +17,7 @@ + + #include + #include ++#include + + /* Namespaces */ + #define XATTR_OS2_PREFIX "os2." +@@ -48,10 +49,10 @@ struct xattr_handler { + }; + + ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); +-ssize_t vfs_getxattr(struct dentry *, struct vfsmount *, const char *, void *, size_t); +-ssize_t vfs_listxattr(struct dentry *d, struct vfsmount *, char *list, size_t size); +-int vfs_setxattr(struct dentry *, struct vfsmount *, const char *, const void *, size_t, int); +-int vfs_removexattr(struct dentry *, struct vfsmount *mnt, const char *); ++ssize_t vfs_getxattr(struct dentry *, struct vfsmount *, const char *, void *, size_t, struct file *file); ++ssize_t vfs_listxattr(struct dentry *d, struct vfsmount *, char *list, size_t size, struct file *file); ++int vfs_setxattr(struct dentry *, struct vfsmount *, const char *, const void *, size_t, int, struct file *file); ++int vfs_removexattr(struct dentry *, struct vfsmount *mnt, const char *, struct file *file); + + ssize_t generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size); + ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); +--- a/security/capability.c ++++ b/security/capability.c +@@ -242,12 +242,13 @@ static void cap_inode_post_setxattr(stru + } + + static int cap_inode_getxattr(struct dentry *dentry, struct vfsmount *mnt, +- const char *name) ++ const char *name, struct file *f) + { + return 0; + } + +-static int cap_inode_listxattr(struct dentry *dentry, struct vfsmount *mnt) ++static int cap_inode_listxattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct file *f) + { + return 0; + } +--- a/security/commoncap.c ++++ b/security/commoncap.c +@@ -416,7 +416,7 @@ int cap_bprm_secureexec (struct linux_bi + + int cap_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, + const char *name, const void *value, size_t size, +- int flags) ++ int flags, struct file *file) + { + if (!strcmp(name, XATTR_NAME_CAPS)) { + if (!capable(CAP_SETFCAP)) +@@ -430,7 +430,7 @@ int cap_inode_setxattr(struct dentry *de + } + + int cap_inode_removexattr(struct dentry *dentry, struct vfsmount *mnt, +- const char *name) ++ const char *name, struct file *file) + { + if (!strcmp(name, XATTR_NAME_CAPS)) { + if (!capable(CAP_SETFCAP)) +--- a/security/security.c ++++ b/security/security.c +@@ -473,12 +473,12 @@ void security_inode_delete(struct inode + + int security_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, + const char *name, const void *value, size_t size, +- int flags) ++ int flags, struct file *file) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; + return security_ops->inode_setxattr(dentry, mnt, name, value, size, +- flags); ++ flags, file); + } + + void security_inode_post_setxattr(struct dentry *dentry, struct vfsmount *mnt, +@@ -492,26 +492,27 @@ void security_inode_post_setxattr(struct + } + + int security_inode_getxattr(struct dentry *dentry, struct vfsmount *mnt, +- const char *name) ++ const char *name, struct file *file) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_getxattr(dentry, mnt, name); ++ return security_ops->inode_getxattr(dentry, mnt, name, file); + } + +-int security_inode_listxattr(struct dentry *dentry, struct vfsmount *mnt) ++int security_inode_listxattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct file *file) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_listxattr(dentry, mnt); ++ return security_ops->inode_listxattr(dentry, mnt, file); + } + + int security_inode_removexattr(struct dentry *dentry, struct vfsmount *mnt, +- const char *name) ++ const char *name, struct file *file) + { + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; +- return security_ops->inode_removexattr(dentry, mnt, name); ++ return security_ops->inode_removexattr(dentry, mnt, name, file); + } + + int security_inode_need_killpriv(struct dentry *dentry) +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -2715,7 +2715,7 @@ static int selinux_inode_setotherxattr(s + + static int selinux_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, + const char *name, const void *value, +- size_t size, int flags) ++ size_t size, int flags, struct file *file) + { + struct task_security_struct *tsec = current->security; + struct inode *inode = dentry->d_inode; +@@ -2797,18 +2797,20 @@ static void selinux_inode_post_setxattr( + } + + static int selinux_inode_getxattr(struct dentry *dentry, struct vfsmount *mnt, +- const char *name) ++ const char *name, struct file *file) + { + return dentry_has_perm(current, NULL, dentry, FILE__GETATTR); + } + +-static int selinux_inode_listxattr(struct dentry *dentry, struct vfsmount *mnt) ++static int selinux_inode_listxattr(struct dentry *dentry, struct vfsmount *mnt, ++ struct file *file) + { + return dentry_has_perm(current, NULL, dentry, FILE__GETATTR); + } + + static int selinux_inode_removexattr(struct dentry *dentry, +- struct vfsmount *mnt, const char *name) ++ struct vfsmount *mnt, const char *name, ++ struct file *file) + { + if (strcmp(name, XATTR_NAME_SELINUX)) + return selinux_inode_setotherxattr(dentry, name); +--- a/security/smack/smack_lsm.c ++++ b/security/smack/smack_lsm.c +@@ -600,6 +600,7 @@ static int smack_inode_getattr(struct vf + * @value: unused + * @size: unused + * @flags: unused ++ * @file: unused + * + * This protects the Smack attribute explicitly. + * +@@ -607,7 +608,7 @@ static int smack_inode_getattr(struct vf + */ + static int smack_inode_setxattr(struct dentry *dentry, struct vfsmount *mnt, + const char *name, const void *value, +- size_t size, int flags) ++ size_t size, int flags, struct file *file) + { + int rc = 0; + +@@ -619,7 +620,8 @@ static int smack_inode_setxattr(struct d + if (size == 0) + rc = -EINVAL; + } else +- rc = cap_inode_setxattr(dentry, mnt, name, value, size, flags); ++ rc = cap_inode_setxattr(dentry, mnt, name, value, size, flags, ++ file); + + if (rc == 0) + rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); +@@ -675,11 +677,12 @@ static void smack_inode_post_setxattr(st + * @dentry: the object + * @mnt: unused + * @name: unused ++ * @file: unused + * + * Returns 0 if access is permitted, an error code otherwise + */ + static int smack_inode_getxattr(struct dentry *dentry, struct vfsmount *mnt, +- const char *name) ++ const char *name, struct file *file) + { + return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ); + } +@@ -689,13 +692,14 @@ static int smack_inode_getxattr(struct d + * @dentry: the object + * @mnt: unused + * @name: name of the attribute ++ * @file: unused + * + * Removing the Smack attribute requires CAP_MAC_ADMIN + * + * Returns 0 if access is permitted, an error code otherwise + */ + static int smack_inode_removexattr(struct dentry *dentry, struct vfsmount *mnt, +- const char *name) ++ const char *name, struct file *file) + { + int rc = 0; + +@@ -705,7 +709,7 @@ static int smack_inode_removexattr(struc + if (!capable(CAP_MAC_ADMIN)) + rc = -EPERM; + } else +- rc = cap_inode_removexattr(dentry, mnt, name); ++ rc = cap_inode_removexattr(dentry, mnt, name, file); + + if (rc == 0) + rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/sysctl-pathname.diff b/src/patches/suse-2.6.27.25/patches.apparmor/sysctl-pathname.diff new file mode 100644 index 0000000000..599c76f1a1 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/sysctl-pathname.diff @@ -0,0 +1,111 @@ +From: Andreas Gruenbacher +Subject: Factor out sysctl pathname code + +Convert the selinux sysctl pathname computation code into a standalone +function. + +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen +Reviewed-by: James Morris + +--- + include/linux/sysctl.h | 2 ++ + kernel/sysctl.c | 27 +++++++++++++++++++++++++++ + security/selinux/hooks.c | 34 +++++----------------------------- + 3 files changed, 34 insertions(+), 29 deletions(-) + +--- a/include/linux/sysctl.h ++++ b/include/linux/sysctl.h +@@ -996,6 +996,8 @@ extern int proc_doulongvec_minmax(struct + extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, + struct file *, void __user *, size_t *, loff_t *); + ++extern char *sysctl_pathname(ctl_table *, char *, int); ++ + extern int do_sysctl (int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen); +--- a/kernel/sysctl.c ++++ b/kernel/sysctl.c +@@ -1554,6 +1554,33 @@ void register_sysctl_root(struct ctl_tab + spin_unlock(&sysctl_lock); + } + ++char *sysctl_pathname(struct ctl_table *table, char *buffer, int buflen) ++{ ++ if (buflen < 1) ++ return NULL; ++ buffer += --buflen; ++ *buffer = '\0'; ++ ++ while (table) { ++ int namelen = strlen(table->procname); ++ ++ if (buflen < namelen + 1) ++ return NULL; ++ buflen -= namelen + 1; ++ buffer -= namelen; ++ memcpy(buffer, table->procname, namelen); ++ *--buffer = '/'; ++ table = table->parent; ++ } ++ if (buflen < 4) ++ return NULL; ++ buffer -= 4; ++ memcpy(buffer, "/sys", 4); ++ ++ return buffer; ++} ++EXPORT_SYMBOL_GPL(sysctl_pathname); ++ + #ifdef CONFIG_SYSCTL_SYSCALL + /* Perform the actual read/write of a sysctl table entry. */ + static int do_sysctl_strategy(struct ctl_table_root *root, +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -1811,40 +1811,16 @@ static int selinux_capable(struct task_s + + static int selinux_sysctl_get_sid(ctl_table *table, u16 tclass, u32 *sid) + { +- int buflen, rc; +- char *buffer, *path, *end; ++ char *buffer, *path; ++ int rc = -ENOMEM; + +- rc = -ENOMEM; + buffer = (char *)__get_free_page(GFP_KERNEL); + if (!buffer) + goto out; + +- buflen = PAGE_SIZE; +- end = buffer+buflen; +- *--end = '\0'; +- buflen--; +- path = end-1; +- *path = '/'; +- while (table) { +- const char *name = table->procname; +- size_t namelen = strlen(name); +- buflen -= namelen + 1; +- if (buflen < 0) +- goto out_free; +- end -= namelen; +- memcpy(end, name, namelen); +- *--end = '/'; +- path = end; +- table = table->parent; +- } +- buflen -= 4; +- if (buflen < 0) +- goto out_free; +- end -= 4; +- memcpy(end, "/sys", 4); +- path = end; +- rc = security_genfs_sid("proc", path, tclass, sid); +-out_free: ++ path = sysctl_pathname(table, buffer, PAGE_SIZE); ++ if (path) ++ rc = security_genfs_sid("proc", path, tclass, sid); + free_page((unsigned long)buffer); + out: + return rc; diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/unambiguous-__d_path.diff b/src/patches/suse-2.6.27.25/patches.apparmor/unambiguous-__d_path.diff new file mode 100644 index 0000000000..924b566a27 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/unambiguous-__d_path.diff @@ -0,0 +1,267 @@ +From: Andreas Gruenbacher +Subject: Fix __d_path() for lazy unmounts and make it unambiguous + +First, when __d_path() hits a lazily unmounted mount point, it tries to prepend +the name of the lazily unmounted dentry to the path name. It gets this wrong, +and also overwrites the slash that separates the name from the following +pathname component. This patch fixes that; if a process was in directory +/foo/bar and /foo got lazily unmounted, the old result was ``foobar'' (note the +missing slash), while the new result with this patch is ``foo/bar''. + +Second, it isn't always possible to tell from the __d_path() result whether the +specified root and rootmnt (i.e., the chroot) was reached. We need an +unambiguous result for AppArmor at least though, so we make sure that paths +will only start with a slash if the path leads all the way up to the root. + +We also add a @fail_deleted argument, which allows to get rid of some of the +mess in sys_getcwd(). + +This patch leaves getcwd() and d_path() as they were before for everything +except for bind-mounted directories; for them, it reports ``/foo/bar'' instead +of ``foobar'' in the example described above. + +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen +Acked-by: Alan Cox + +[ Moved dcache_lock outside vfsmount_lock to fix lock order (bnc#490902) ] +Signed-off-by: Nick Piggin + +--- + fs/dcache.c | 126 +++++++++++++++++++++++++++---------------------- + fs/seq_file.c | 4 - + include/linux/dcache.h | 5 + + 3 files changed, 75 insertions(+), 60 deletions(-) + +Index: linux-2.6.27/fs/dcache.c +=================================================================== +--- linux-2.6.27.orig/fs/dcache.c ++++ linux-2.6.27/fs/dcache.c +@@ -1898,44 +1898,46 @@ static int prepend_name(char **buffer, i + * @root: root vfsmnt/dentry (may be modified by this function) + * @buffer: buffer to return value in + * @buflen: buffer length ++ * @flags: flags controling behavior of d_path + * +- * Convert a dentry into an ASCII path name. If the entry has been deleted +- * the string " (deleted)" is appended. Note that this is ambiguous. +- * +- * Returns the buffer or an error code if the path was too long. +- * +- * "buflen" should be positive. Caller holds the dcache_lock. ++ * Convert a dentry into an ASCII path name. If the entry has been deleted, ++ * then if @flags has D_PATH_FAIL_DELETED set, ERR_PTR(-ENOENT) is returned. ++ * Otherwise, the string " (deleted)" is appended. Note that this is ambiguous. + * + * If path is not reachable from the supplied root, then the value of +- * root is changed (without modifying refcounts). ++ * root is changed (without modifying refcounts). The path returned in this ++ * case will be relative (i.e., it will not start with a slash). ++ * ++ * Returns the buffer or an error code if the path was too long. + */ + char *__d_path(const struct path *path, struct path *root, +- char *buffer, int buflen) ++ char *buffer, int buflen, int flags) + { + struct dentry *dentry = path->dentry; + struct vfsmount *vfsmnt = path->mnt; +- char *end = buffer + buflen; +- char *retval; ++ const unsigned char *name; ++ int namelen; ++ ++ buffer += buflen; ++ prepend(&buffer, &buflen, "\0", 1); + ++ spin_lock(&dcache_lock); + spin_lock(&vfsmount_lock); +- prepend(&end, &buflen, "\0", 1); +- if (!IS_ROOT(dentry) && d_unhashed(dentry) && +- (prepend(&end, &buflen, " (deleted)", 10) != 0)) ++ if (!IS_ROOT(dentry) && d_unhashed(dentry)) { ++ if (flags & D_PATH_FAIL_DELETED) { ++ buffer = ERR_PTR(-ENOENT); ++ goto out; ++ } ++ if (prepend(&buffer, &buflen, " (deleted)", 10) != 0) + goto Elong; +- ++ } + if (buflen < 1) + goto Elong; +- /* Get '/' right */ +- retval = end-1; +- *retval = '/'; + +- for (;;) { ++ while (dentry != root->dentry || vfsmnt != root->mnt) { + struct dentry * parent; + +- if (dentry == root->dentry && vfsmnt == root->mnt) +- break; + if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { +- /* Global root? */ + if (vfsmnt->mnt_parent == vfsmnt) { + goto global_root; + } +@@ -1945,27 +1947,51 @@ char *__d_path(const struct path *path, + } + parent = dentry->d_parent; + prefetch(parent); +- if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || +- (prepend(&end, &buflen, "/", 1) != 0)) ++ if ((prepend_name(&buffer, &buflen, &dentry->d_name) != 0) || ++ (prepend(&buffer, &buflen, "/", 1) != 0)) + goto Elong; +- retval = end; + dentry = parent; + } ++ /* Get '/' right. */ ++ if (*buffer != '/' && prepend(&buffer, &buflen, "/", 1)) ++ goto Elong; + + out: + spin_unlock(&vfsmount_lock); +- return retval; ++ spin_unlock(&dcache_lock); ++ return buffer; + + global_root: +- retval += 1; /* hit the slash */ +- if (prepend_name(&retval, &buflen, &dentry->d_name) != 0) ++ /* ++ * We went past the (vfsmount, dentry) we were looking for and have ++ * either hit a root dentry, a lazily unmounted dentry, an ++ * unconnected dentry, or the file is on a pseudo filesystem. ++ */ ++ namelen = dentry->d_name.len; ++ name = dentry->d_name.name; ++ ++ /* ++ * If this is a root dentry, then overwrite the slash. This ++ * will also DTRT with pseudo filesystems which have root ++ * dentries named "foo:". ++ */ ++ if (IS_ROOT(dentry) && *buffer == '/') { ++ buffer++; ++ buflen++; ++ } ++ if ((flags & D_PATH_DISCONNECT) && *name == '/') { ++ /* Make sure we won't return a pathname starting with '/' */ ++ name++; ++ namelen--; ++ } ++ if (prepend(&buffer, &buflen, name, namelen)) + goto Elong; + root->mnt = vfsmnt; + root->dentry = dentry; + goto out; + + Elong: +- retval = ERR_PTR(-ENAMETOOLONG); ++ buffer = ERR_PTR(-ENAMETOOLONG); + goto out; + } + +@@ -2002,10 +2028,8 @@ char *d_path(const struct path *path, ch + root = current->fs->root; + path_get(&root); + read_unlock(¤t->fs->lock); +- spin_lock(&dcache_lock); + tmp = root; +- res = __d_path(path, &tmp, buf, buflen); +- spin_unlock(&dcache_lock); ++ res = __d_path(path, &tmp, buf, buflen, 0); + path_put(&root); + return res; + } +@@ -2088,9 +2112,9 @@ Elong: + */ + SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) + { +- int error; +- struct path pwd, root; +- char *page = (char *) __get_free_page(GFP_USER); ++ int error, len; ++ struct path pwd, root, tmp; ++ char *page = (char *) __get_free_page(GFP_USER), *cwd; + + if (!page) + return -ENOMEM; +@@ -2102,30 +2126,20 @@ SYSCALL_DEFINE2(getcwd, char __user *, b + path_get(&root); + read_unlock(¤t->fs->lock); + +- error = -ENOENT; +- /* Has the current directory has been unlinked? */ +- spin_lock(&dcache_lock); +- if (IS_ROOT(pwd.dentry) || !d_unhashed(pwd.dentry)) { +- unsigned long len; +- struct path tmp = root; +- char * cwd; +- +- cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE); +- spin_unlock(&dcache_lock); +- ++ tmp = root; ++ cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE, D_PATH_FAIL_DELETED); ++ if (IS_ERR(cwd)) { + error = PTR_ERR(cwd); +- if (IS_ERR(cwd)) +- goto out; ++ goto out; ++ } + +- error = -ERANGE; +- len = PAGE_SIZE + page - cwd; +- if (len <= size) { +- error = len; +- if (copy_to_user(buf, cwd, len)) +- error = -EFAULT; +- } +- } else +- spin_unlock(&dcache_lock); ++ error = -ERANGE; ++ len = PAGE_SIZE + page - cwd; ++ if (len <= size) { ++ error = len; ++ if (copy_to_user(buf, cwd, len)) ++ error = -EFAULT; ++ } + + out: + path_put(&pwd); +Index: linux-2.6.27/fs/seq_file.c +=================================================================== +--- linux-2.6.27.orig/fs/seq_file.c ++++ linux-2.6.27/fs/seq_file.c +@@ -441,9 +441,7 @@ int seq_path_root(struct seq_file *m, st + char *s = m->buf + m->count; + char *p; + +- spin_lock(&dcache_lock); +- p = __d_path(path, root, s, m->size - m->count); +- spin_unlock(&dcache_lock); ++ p = __d_path(path, root, s, m->size - m->count, 0); + err = PTR_ERR(p); + if (!IS_ERR(p)) { + s = mangle_path(s, p, esc); +Index: linux-2.6.27/include/linux/dcache.h +=================================================================== +--- linux-2.6.27.orig/include/linux/dcache.h ++++ linux-2.6.27/include/linux/dcache.h +@@ -299,9 +299,12 @@ extern int d_validate(struct dentry *, s + /* + * helper function for dentry_operations.d_dname() members + */ ++#define D_PATH_FAIL_DELETED 1 ++#define D_PATH_DISCONNECT 2 + extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); + +-extern char *__d_path(const struct path *path, struct path *root, char *, int); ++extern char *__d_path(const struct path *path, struct path *root, char *, int, ++ int); + extern char *d_path(const struct path *, char *, int); + extern char *dentry_path(struct dentry *, char *, int); + diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/vfs-getxattr.diff b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-getxattr.diff new file mode 100644 index 0000000000..8219e1dd58 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-getxattr.diff @@ -0,0 +1,190 @@ +From: Tony Jones +Subject: Add a struct vfsmount parameter to vfs_getxattr() + +The vfsmount will be passed down to the LSM hook so that LSMs can compute +pathnames. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/nfsd/nfs4xdr.c | 2 +- + fs/nfsd/vfs.c | 21 ++++++++++++--------- + fs/xattr.c | 15 ++++++++------- + include/linux/nfsd/nfsd.h | 3 ++- + include/linux/xattr.h | 2 +- + 5 files changed, 24 insertions(+), 19 deletions(-) + +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -1446,7 +1446,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s + } + if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT + | FATTR4_WORD0_SUPPORTED_ATTRS)) { +- err = nfsd4_get_nfs4_acl(rqstp, dentry, &acl); ++ err = nfsd4_get_nfs4_acl(rqstp, dentry, exp->ex_path.mnt, &acl); + aclsupport = (err == 0); + if (bmval0 & FATTR4_WORD0_ACL) { + if (err == -EOPNOTSUPP) +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -420,11 +420,12 @@ out_nfserr: + #if defined(CONFIG_NFSD_V2_ACL) || \ + defined(CONFIG_NFSD_V3_ACL) || \ + defined(CONFIG_NFSD_V4) +-static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) ++static ssize_t nfsd_getxattr(struct dentry *dentry, struct vfsmount *mnt, ++ char *key, void **buf) + { + ssize_t buflen; + +- buflen = vfs_getxattr(dentry, key, NULL, 0); ++ buflen = vfs_getxattr(dentry, mnt, key, NULL, 0); + if (buflen <= 0) + return buflen; + +@@ -432,7 +433,7 @@ static ssize_t nfsd_getxattr(struct dent + if (!*buf) + return -ENOMEM; + +- return vfs_getxattr(dentry, key, *buf, buflen); ++ return vfs_getxattr(dentry, mnt, key, *buf, buflen); + } + #endif + +@@ -513,13 +514,13 @@ out_nfserr: + } + + static struct posix_acl * +-_get_posix_acl(struct dentry *dentry, char *key) ++_get_posix_acl(struct dentry *dentry, struct vfsmount *mnt, char *key) + { + void *buf = NULL; + struct posix_acl *pacl = NULL; + int buflen; + +- buflen = nfsd_getxattr(dentry, key, &buf); ++ buflen = nfsd_getxattr(dentry, mnt, key, &buf); + if (!buflen) + buflen = -ENODATA; + if (buflen <= 0) +@@ -531,14 +532,15 @@ _get_posix_acl(struct dentry *dentry, ch + } + + int +-nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl) ++nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, ++ struct vfsmount *mnt, struct nfs4_acl **acl) + { + struct inode *inode = dentry->d_inode; + int error = 0; + struct posix_acl *pacl = NULL, *dpacl = NULL; + unsigned int flags = 0; + +- pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS); ++ pacl = _get_posix_acl(dentry, mnt, POSIX_ACL_XATTR_ACCESS); + if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) + pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + if (IS_ERR(pacl)) { +@@ -548,7 +550,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqst + } + + if (S_ISDIR(inode->i_mode)) { +- dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT); ++ dpacl = _get_posix_acl(dentry, mnt, POSIX_ACL_XATTR_DEFAULT); + if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) + dpacl = NULL; + else if (IS_ERR(dpacl)) { +@@ -2080,7 +2082,8 @@ nfsd_get_posix_acl(struct svc_fh *fhp, i + return ERR_PTR(-EOPNOTSUPP); + } + +- size = nfsd_getxattr(fhp->fh_dentry, name, &value); ++ size = nfsd_getxattr(fhp->fh_dentry, fhp->fh_export->ex_path.mnt, name, ++ &value); + if (size < 0) + return ERR_PTR(size); + +--- a/fs/xattr.c ++++ b/fs/xattr.c +@@ -131,7 +131,8 @@ out_noalloc: + EXPORT_SYMBOL_GPL(xattr_getsecurity); + + ssize_t +-vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) ++vfs_getxattr(struct dentry *dentry, struct vfsmount *mnt, const char *name, ++ void *value, size_t size) + { + struct inode *inode = dentry->d_inode; + int error; +@@ -314,8 +315,8 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, cons + * Extended attribute GET operations + */ + static ssize_t +-getxattr(struct dentry *d, const char __user *name, void __user *value, +- size_t size) ++getxattr(struct dentry *dentry, struct vfsmount *mnt, const char __user *name, ++ void __user *value, size_t size) + { + ssize_t error; + void *kvalue = NULL; +@@ -335,7 +336,7 @@ getxattr(struct dentry *d, const char __ + return -ENOMEM; + } + +- error = vfs_getxattr(d, kname, kvalue, size); ++ error = vfs_getxattr(dentry, mnt, kname, kvalue, size); + if (error > 0) { + if (size && copy_to_user(value, kvalue, error)) + error = -EFAULT; +@@ -357,7 +358,7 @@ SYSCALL_DEFINE4(getxattr, const char __u + error = user_path(pathname, &path); + if (error) + return error; +- error = getxattr(path.dentry, name, value, size); ++ error = getxattr(path.dentry, path.mnt, name, value, size); + path_put(&path); + return error; + } +@@ -371,7 +372,7 @@ SYSCALL_DEFINE4(lgetxattr, const char __ + error = user_lpath(pathname, &path); + if (error) + return error; +- error = getxattr(path.dentry, name, value, size); ++ error = getxattr(path.dentry, path.mnt, name, value, size); + path_put(&path); + return error; + } +@@ -386,7 +387,7 @@ SYSCALL_DEFINE4(fgetxattr, int, fd, cons + if (!f) + return error; + audit_inode(NULL, f->f_path.dentry); +- error = getxattr(f->f_path.dentry, name, value, size); ++ error = getxattr(f->f_path.dentry, f->f_path.mnt, name, value, size); + fput(f); + return error; + } +--- a/include/linux/nfsd/nfsd.h ++++ b/include/linux/nfsd/nfsd.h +@@ -85,7 +85,8 @@ __be32 nfsd_setattr(struct svc_rqst *, + #ifdef CONFIG_NFSD_V4 + __be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *, + struct nfs4_acl *); +-int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **); ++int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, ++ struct vfsmount *mnt, struct nfs4_acl **); + #endif /* CONFIG_NFSD_V4 */ + __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, + char *name, int len, struct iattr *attrs, +--- a/include/linux/xattr.h ++++ b/include/linux/xattr.h +@@ -48,7 +48,7 @@ struct xattr_handler { + }; + + ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); +-ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t); ++ssize_t vfs_getxattr(struct dentry *, struct vfsmount *, const char *, void *, size_t); + ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); + int vfs_setxattr(struct dentry *, struct vfsmount *, const char *, const void *, size_t, int); + int vfs_removexattr(struct dentry *, const char *); diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/vfs-link.diff b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-link.diff new file mode 100644 index 0000000000..2c0a53a272 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-link.diff @@ -0,0 +1,91 @@ +From: Tony Jones +Subject: Add struct vfsmount parameters to vfs_link() + +The vfsmount will be passed down to the LSM hook so that LSMs can compute +pathnames. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/ecryptfs/inode.c | 9 +++++++-- + fs/namei.c | 6 ++++-- + fs/nfsd/vfs.c | 3 ++- + include/linux/fs.h | 2 +- + 4 files changed, 14 insertions(+), 6 deletions(-) + +--- a/fs/ecryptfs/inode.c ++++ b/fs/ecryptfs/inode.c +@@ -403,19 +403,24 @@ static int ecryptfs_link(struct dentry * + struct dentry *new_dentry) + { + struct dentry *lower_old_dentry; ++ struct vfsmount *lower_old_mnt; + struct dentry *lower_new_dentry; ++ struct vfsmount *lower_new_mnt; + struct dentry *lower_dir_dentry; + u64 file_size_save; + int rc; + + file_size_save = i_size_read(old_dentry->d_inode); + lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); ++ lower_old_mnt = ecryptfs_dentry_to_lower_mnt(old_dentry); + lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); ++ lower_new_mnt = ecryptfs_dentry_to_lower_mnt(new_dentry); + dget(lower_old_dentry); + dget(lower_new_dentry); + lower_dir_dentry = lock_parent(lower_new_dentry); +- rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode, +- lower_new_dentry); ++ rc = vfs_link(lower_old_dentry, lower_old_mnt, ++ lower_dir_dentry->d_inode, lower_new_dentry, ++ lower_new_mnt); + if (rc || !lower_new_dentry->d_inode) + goto out_lock; + rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0); +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -2412,7 +2412,7 @@ SYSCALL_DEFINE2(symlink, const char __us + return sys_symlinkat(oldname, AT_FDCWD, newname); + } + +-int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) ++int vfs_link(struct dentry *old_dentry, struct vfsmount *old_mnt, struct inode *dir, struct dentry *new_dentry, struct vfsmount *new_mnt) + { + struct inode *inode = old_dentry->d_inode; + int error; +@@ -2490,7 +2490,9 @@ SYSCALL_DEFINE5(linkat, int, olddfd, con + error = mnt_want_write(nd.path.mnt); + if (error) + goto out_dput; +- error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry); ++ error = vfs_link(old_path.dentry, old_path.mnt, ++ nd.path.dentry->d_inode, ++ new_dentry, nd.path.mnt); + mnt_drop_write(nd.path.mnt); + out_dput: + dput(new_dentry); +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -1650,7 +1650,8 @@ nfsd_link(struct svc_rqst *rqstp, struct + err = nfserrno(host_err); + goto out_dput; + } +- host_err = vfs_link(dold, dirp, dnew); ++ host_err = vfs_link(dold, tfhp->fh_export->ex_path.mnt, dirp, ++ dnew, ffhp->fh_export->ex_path.mnt); + if (!host_err) { + if (EX_ISSYNC(ffhp->fh_export)) { + err = nfserrno(nfsd_sync_dir(ddir)); +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1181,7 +1181,7 @@ extern int vfs_create(struct inode *, st + extern int vfs_mkdir(struct inode *, struct dentry *, struct vfsmount *, int); + extern int vfs_mknod(struct inode *, struct dentry *, struct vfsmount *, int, dev_t); + extern int vfs_symlink(struct inode *, struct dentry *, struct vfsmount *, const char *); +-extern int vfs_link(struct dentry *, struct inode *, struct dentry *); ++extern int vfs_link(struct dentry *, struct vfsmount *, struct inode *, struct dentry *, struct vfsmount *); + extern int vfs_rmdir(struct inode *, struct dentry *); + extern int vfs_unlink(struct inode *, struct dentry *); + extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/vfs-listxattr.diff b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-listxattr.diff new file mode 100644 index 0000000000..2c6f24ee68 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-listxattr.diff @@ -0,0 +1,101 @@ +From: Tony Jones +Subject: Add a struct vfsmount parameter to vfs_listxattr() + +The vfsmount will be passed down to the LSM hook so that LSMs can compute +pathnames. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/xattr.c | 25 ++++++++++++++----------- + include/linux/xattr.h | 2 +- + 2 files changed, 15 insertions(+), 12 deletions(-) + +--- a/fs/xattr.c ++++ b/fs/xattr.c +@@ -168,18 +168,20 @@ nolsm: + EXPORT_SYMBOL_GPL(vfs_getxattr); + + ssize_t +-vfs_listxattr(struct dentry *d, char *list, size_t size) ++vfs_listxattr(struct dentry *dentry, struct vfsmount *mnt, char *list, ++ size_t size) + { ++ struct inode *inode = dentry->d_inode; + ssize_t error; + +- error = security_inode_listxattr(d); ++ error = security_inode_listxattr(dentry); + if (error) + return error; + error = -EOPNOTSUPP; +- if (d->d_inode->i_op && d->d_inode->i_op->listxattr) { +- error = d->d_inode->i_op->listxattr(d, list, size); +- } else { +- error = security_inode_listsecurity(d->d_inode, list, size); ++ if (inode->i_op && inode->i_op->listxattr) ++ error = inode->i_op->listxattr(dentry, list, size); ++ else { ++ error = security_inode_listsecurity(inode, list, size); + if (size && error > size) + error = -ERANGE; + } +@@ -396,7 +398,8 @@ SYSCALL_DEFINE4(fgetxattr, int, fd, cons + * Extended attribute LIST operations + */ + static ssize_t +-listxattr(struct dentry *d, char __user *list, size_t size) ++listxattr(struct dentry *dentry, struct vfsmount *mnt, char __user *list, ++ size_t size) + { + ssize_t error; + char *klist = NULL; +@@ -409,7 +412,7 @@ listxattr(struct dentry *d, char __user + return -ENOMEM; + } + +- error = vfs_listxattr(d, klist, size); ++ error = vfs_listxattr(dentry, mnt, klist, size); + if (error > 0) { + if (size && copy_to_user(list, klist, error)) + error = -EFAULT; +@@ -431,7 +434,7 @@ SYSCALL_DEFINE3(listxattr, const char __ + error = user_path(pathname, &path); + if (error) + return error; +- error = listxattr(path.dentry, list, size); ++ error = listxattr(path.dentry, path.mnt, list, size); + path_put(&path); + return error; + } +@@ -445,7 +448,7 @@ SYSCALL_DEFINE3(llistxattr, const char _ + error = user_lpath(pathname, &path); + if (error) + return error; +- error = listxattr(path.dentry, list, size); ++ error = listxattr(path.dentry, path.mnt, list, size); + path_put(&path); + return error; + } +@@ -459,7 +462,7 @@ SYSCALL_DEFINE3(flistxattr, int, fd, cha + if (!f) + return error; + audit_inode(NULL, f->f_path.dentry); +- error = listxattr(f->f_path.dentry, list, size); ++ error = listxattr(f->f_path.dentry, f->f_path.mnt, list, size); + fput(f); + return error; + } +--- a/include/linux/xattr.h ++++ b/include/linux/xattr.h +@@ -49,7 +49,7 @@ struct xattr_handler { + + ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); + ssize_t vfs_getxattr(struct dentry *, struct vfsmount *, const char *, void *, size_t); +-ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); ++ssize_t vfs_listxattr(struct dentry *d, struct vfsmount *, char *list, size_t size); + int vfs_setxattr(struct dentry *, struct vfsmount *, const char *, const void *, size_t, int); + int vfs_removexattr(struct dentry *, const char *); + diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/vfs-mkdir.diff b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-mkdir.diff new file mode 100644 index 0000000000..9e45fa80f4 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-mkdir.diff @@ -0,0 +1,137 @@ +From: Tony Jones +Subject: Add struct vfsmount parameter to vfs_mkdir() + +The vfsmount will be passed down to the LSM hook so that LSMs can compute +pathnames. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/ecryptfs/inode.c | 5 ++++- + fs/namei.c | 5 +++-- + fs/nfsd/nfs4recover.c | 3 ++- + fs/nfsd/vfs.c | 8 +++++--- + include/linux/fs.h | 2 +- + kernel/cgroup.c | 2 +- + 6 files changed, 16 insertions(+), 9 deletions(-) + +--- a/fs/ecryptfs/inode.c ++++ b/fs/ecryptfs/inode.c +@@ -501,11 +501,14 @@ static int ecryptfs_mkdir(struct inode * + { + int rc; + struct dentry *lower_dentry; ++ struct vfsmount *lower_mnt; + struct dentry *lower_dir_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); ++ lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + lower_dir_dentry = lock_parent(lower_dentry); +- rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, mode); ++ rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, lower_mnt, ++ mode); + if (rc || !lower_dentry->d_inode) + goto out; + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -2077,7 +2077,8 @@ SYSCALL_DEFINE3(mknod, const char __user + return sys_mknodat(AT_FDCWD, filename, mode, dev); + } + +-int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) ++int vfs_mkdir(struct inode *dir, struct dentry *dentry, struct vfsmount *mnt, ++ int mode) + { + int error = may_create(dir, dentry, 1); + +@@ -2120,7 +2121,7 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const + error = mnt_want_write(nd.path.mnt); + if (error) + goto out_dput; +- error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); ++ error = vfs_mkdir(nd.path.dentry->d_inode, dentry, nd.path.mnt, mode); + mnt_drop_write(nd.path.mnt); + out_dput: + dput(dentry); +--- a/fs/nfsd/nfs4recover.c ++++ b/fs/nfsd/nfs4recover.c +@@ -158,7 +158,8 @@ nfsd4_create_clid_dir(struct nfs4_client + status = mnt_want_write(rec_dir.path.mnt); + if (status) + goto out_put; +- status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU); ++ status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, ++ rec_dir.path.mnt, S_IRWXU); + mnt_drop_write(rec_dir.path.mnt); + out_put: + dput(dentry); +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -1215,6 +1215,7 @@ nfsd_create(struct svc_rqst *rqstp, stru + int type, dev_t rdev, struct svc_fh *resfhp) + { + struct dentry *dentry, *dchild = NULL; ++ struct svc_export *exp; + struct inode *dirp; + __be32 err; + __be32 err2; +@@ -1232,6 +1233,7 @@ nfsd_create(struct svc_rqst *rqstp, stru + goto out; + + dentry = fhp->fh_dentry; ++ exp = fhp->fh_export; + dirp = dentry->d_inode; + + err = nfserr_notdir; +@@ -1248,7 +1250,7 @@ nfsd_create(struct svc_rqst *rqstp, stru + host_err = PTR_ERR(dchild); + if (IS_ERR(dchild)) + goto out_nfserr; +- err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); ++ err = fh_compose(resfhp, exp, dchild, fhp); + if (err) + goto out; + } else { +@@ -1298,7 +1300,7 @@ nfsd_create(struct svc_rqst *rqstp, stru + host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); + break; + case S_IFDIR: +- host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); ++ host_err = vfs_mkdir(dirp, dchild, exp->ex_path.mnt, iap->ia_mode); + break; + case S_IFCHR: + case S_IFBLK: +@@ -1312,7 +1314,7 @@ nfsd_create(struct svc_rqst *rqstp, stru + goto out_nfserr; + } + +- if (EX_ISSYNC(fhp->fh_export)) { ++ if (EX_ISSYNC(exp)) { + err = nfserrno(nfsd_sync_dir(dentry)); + write_inode_now(dchild->d_inode, 1); + } +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1178,7 +1178,7 @@ extern void unlock_super(struct super_bl + */ + extern int vfs_permission(struct nameidata *, int); + extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); +-extern int vfs_mkdir(struct inode *, struct dentry *, int); ++extern int vfs_mkdir(struct inode *, struct dentry *, struct vfsmount *, int); + extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); + extern int vfs_symlink(struct inode *, struct dentry *, const char *); + extern int vfs_link(struct dentry *, struct inode *, struct dentry *); +--- a/kernel/cgroup.c ++++ b/kernel/cgroup.c +@@ -2911,7 +2911,7 @@ int cgroup_clone(struct task_struct *tsk + } + + /* Create the cgroup directory, which also creates the cgroup */ +- ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755); ++ ret = vfs_mkdir(inode, dentry, NULL, S_IFDIR | 0755); + child = __d_cgrp(dentry); + dput(dentry); + if (ret) { diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/vfs-mknod.diff b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-mknod.diff new file mode 100644 index 0000000000..cc12392b8d --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-mknod.diff @@ -0,0 +1,99 @@ +From: Tony Jones +Subject: Add a struct vfsmount parameter to vfs_mknod() + +The vfsmount will be passed down to the LSM hook so that LSMs can compute +pathnames. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/ecryptfs/inode.c | 5 ++++- + fs/namei.c | 10 ++++++---- + fs/nfsd/vfs.c | 3 ++- + include/linux/fs.h | 2 +- + net/unix/af_unix.c | 3 ++- + 5 files changed, 15 insertions(+), 8 deletions(-) + +--- a/fs/ecryptfs/inode.c ++++ b/fs/ecryptfs/inode.c +@@ -552,11 +552,14 @@ ecryptfs_mknod(struct inode *dir, struct + { + int rc; + struct dentry *lower_dentry; ++ struct vfsmount *lower_mnt; + struct dentry *lower_dir_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); ++ lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + lower_dir_dentry = lock_parent(lower_dentry); +- rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev); ++ rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, lower_mnt, mode, ++ dev); + if (rc || !lower_dentry->d_inode) + goto out; + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -1976,7 +1976,8 @@ fail: + } + EXPORT_SYMBOL_GPL(lookup_create); + +-int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) ++int vfs_mknod(struct inode *dir, struct dentry *dentry, struct vfsmount *mnt, ++ int mode, dev_t dev) + { + int error = may_create(dir, dentry, 0); + +@@ -2054,11 +2055,12 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const + error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); + break; + case S_IFCHR: case S_IFBLK: +- error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode, +- new_decode_dev(dev)); ++ error = vfs_mknod(nd.path.dentry->d_inode, dentry, ++ nd.path, mode, new_decode_dev(dev)); + break; + case S_IFIFO: case S_IFSOCK: +- error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); ++ error = vfs_mknod(nd.path.dentry->d_inode, dentry, ++ nd.path, mode, 0); + break; + } + mnt_drop_write(nd.path.mnt); +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -1306,7 +1306,8 @@ nfsd_create(struct svc_rqst *rqstp, stru + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: +- host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); ++ host_err = vfs_mknod(dirp, dchild, exp->ex_path.mnt, ++ iap->ia_mode, rdev); + break; + } + if (host_err < 0) { +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1179,7 +1179,7 @@ extern void unlock_super(struct super_bl + extern int vfs_permission(struct nameidata *, int); + extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); + extern int vfs_mkdir(struct inode *, struct dentry *, struct vfsmount *, int); +-extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); ++extern int vfs_mknod(struct inode *, struct dentry *, struct vfsmount *, int, dev_t); + extern int vfs_symlink(struct inode *, struct dentry *, const char *); + extern int vfs_link(struct dentry *, struct inode *, struct dentry *); + extern int vfs_rmdir(struct inode *, struct dentry *); +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -827,7 +827,8 @@ static int unix_bind(struct socket *sock + err = mnt_want_write(nd.path.mnt); + if (err) + goto out_mknod_dput; +- err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0); ++ err = vfs_mknod(nd.path.dentry->d_inode, dentry, nd.path.mnt, ++ mode, 0); + mnt_drop_write(nd.path.mnt); + if (err) + goto out_mknod_dput; diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/vfs-notify_change.diff b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-notify_change.diff new file mode 100644 index 0000000000..478d569eb2 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-notify_change.diff @@ -0,0 +1,291 @@ +From: Tony Jones +Subject: Add a vfsmount parameter to notify_change() + +The vfsmount parameter must be set appropriately for files visibile +outside the kernel. Files that are only used in a filesystem (e.g., +reiserfs xattr files) will have a NULL vfsmount. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/attr.c | 3 ++- + fs/ecryptfs/inode.c | 4 +++- + fs/exec.c | 3 ++- + fs/hpfs/namei.c | 2 +- + fs/namei.c | 2 +- + fs/nfsd/vfs.c | 8 ++++---- + fs/open.c | 28 +++++++++++++++------------- + fs/utimes.c | 2 +- + include/linux/fs.h | 6 +++--- + mm/filemap.c | 2 +- + 10 files changed, 33 insertions(+), 27 deletions(-) + +--- a/fs/attr.c ++++ b/fs/attr.c +@@ -100,7 +100,8 @@ int inode_setattr(struct inode * inode, + } + EXPORT_SYMBOL(inode_setattr); + +-int notify_change(struct dentry * dentry, struct iattr * attr) ++int notify_change(struct dentry *dentry, struct vfsmount *mnt, ++ struct iattr *attr) + { + struct inode *inode = dentry->d_inode; + mode_t mode = inode->i_mode; +--- a/fs/ecryptfs/inode.c ++++ b/fs/ecryptfs/inode.c +@@ -850,6 +850,7 @@ static int ecryptfs_setattr(struct dentr + { + int rc = 0; + struct dentry *lower_dentry; ++ struct vfsmount *lower_mnt; + struct inode *inode; + struct inode *lower_inode; + struct ecryptfs_crypt_stat *crypt_stat; +@@ -860,6 +861,7 @@ static int ecryptfs_setattr(struct dentr + inode = dentry->d_inode; + lower_inode = ecryptfs_inode_to_lower(inode); + lower_dentry = ecryptfs_dentry_to_lower(dentry); ++ lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + mutex_lock(&crypt_stat->cs_mutex); + if (S_ISDIR(dentry->d_inode->i_mode)) + crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); +@@ -911,7 +913,7 @@ static int ecryptfs_setattr(struct dentr + ia->ia_valid &= ~ATTR_MODE; + + mutex_lock(&lower_dentry->d_inode->i_mutex); +- rc = notify_change(lower_dentry, ia); ++ rc = notify_change(lower_dentry, lower_mnt, ia); + mutex_unlock(&lower_dentry->d_inode->i_mutex); + out: + fsstack_copy_attr_all(inode, lower_inode, NULL); +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -1841,7 +1841,8 @@ int do_coredump(long signr, int exit_cod + goto close_fail; + if (!file->f_op->write) + goto close_fail; +- if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0) ++ if (!ispipe && ++ do_truncate(file->f_path.dentry, file->f_path.mnt, 0, 0, file) != 0) + goto close_fail; + + retval = binfmt->core_dump(signr, regs, file, core_limit); +--- a/fs/hpfs/namei.c ++++ b/fs/hpfs/namei.c +@@ -426,7 +426,7 @@ again: + /*printk("HPFS: truncating file before delete.\n");*/ + newattrs.ia_size = 0; + newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; +- err = notify_change(dentry, &newattrs); ++ err = notify_change(dentry, NULL, &newattrs); + put_write_access(inode); + if (!err) + goto again; +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -1619,7 +1619,7 @@ int may_open(struct nameidata *nd, int a + if (!error) { + DQUOT_INIT(inode); + +- error = do_truncate(dentry, 0, ++ error = do_truncate(dentry, nd->path.mnt, 0, + ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, + NULL); + } +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -397,7 +397,7 @@ nfsd_setattr(struct svc_rqst *rqstp, str + err = nfserr_notsync; + if (!check_guard || guardtime == inode->i_ctime.tv_sec) { + fh_lock(fhp); +- host_err = notify_change(dentry, iap); ++ host_err = notify_change(dentry, fhp->fh_export->ex_path.mnt, iap); + /* to get NFSERR_JUKEBOX on the wire, need -ETIMEDOUT */ + if (host_err == -EAGAIN) + host_err = -ETIMEDOUT; +@@ -964,13 +964,13 @@ out: + return err; + } + +-static void kill_suid(struct dentry *dentry) ++static void kill_suid(struct dentry *dentry, struct vfsmount *mnt) + { + struct iattr ia; + ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; + + mutex_lock(&dentry->d_inode->i_mutex); +- notify_change(dentry, &ia); ++ notify_change(dentry, mnt, &ia); + mutex_unlock(&dentry->d_inode->i_mutex); + } + +@@ -1033,7 +1033,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, s + + /* clear setuid/setgid flag after write */ + if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) +- kill_suid(dentry); ++ kill_suid(dentry, exp->ex_path.mnt); + + if (host_err >= 0 && stable) { + static ino_t last_ino; +--- a/fs/open.c ++++ b/fs/open.c +@@ -195,8 +195,8 @@ out: + return error; + } + +-int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, +- struct file *filp) ++int do_truncate(struct dentry *dentry, struct vfsmount *mnt, loff_t length, ++ unsigned int time_attrs, struct file *filp) + { + int err; + struct iattr newattrs; +@@ -216,7 +216,7 @@ int do_truncate(struct dentry *dentry, l + newattrs.ia_valid |= should_remove_suid(dentry); + + mutex_lock(&dentry->d_inode->i_mutex); +- err = notify_change(dentry, &newattrs); ++ err = notify_change(dentry, mnt, &newattrs); + mutex_unlock(&dentry->d_inode->i_mutex); + return err; + } +@@ -272,7 +272,7 @@ static long do_sys_truncate(const char _ + error = locks_verify_truncate(inode, NULL, length); + if (!error) { + DQUOT_INIT(inode); +- error = do_truncate(path.dentry, length, 0, NULL); ++ error = do_truncate(path.dentry, path.mnt, length, 0, NULL); + } + + put_write_and_out: +@@ -327,7 +327,8 @@ static long do_sys_ftruncate(unsigned in + + error = locks_verify_truncate(inode, file, length); + if (!error) +- error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); ++ error = do_truncate(dentry, file->f_path.mnt, length, ++ ATTR_MTIME|ATTR_CTIME, file); + out_putf: + fput(file); + out: +@@ -624,7 +625,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd + mode = inode->i_mode; + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; +- err = notify_change(dentry, &newattrs); ++ err = notify_change(dentry, file->f_path.mnt, &newattrs); + mutex_unlock(&inode->i_mutex); + mnt_drop_write(file->f_path.mnt); + out_putf: +@@ -653,7 +654,7 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, cons + mode = inode->i_mode; + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; +- error = notify_change(path.dentry, &newattrs); ++ error = notify_change(path.dentry, path.mnt, &newattrs); + mutex_unlock(&inode->i_mutex); + mnt_drop_write(path.mnt); + dput_and_out: +@@ -667,7 +668,8 @@ SYSCALL_DEFINE2(chmod, const char __user + return sys_fchmodat(AT_FDCWD, filename, mode); + } + +-static int chown_common(struct dentry * dentry, uid_t user, gid_t group) ++static int chown_common(struct dentry * dentry, struct vfsmount *mnt, ++ uid_t user, gid_t group) + { + struct inode *inode = dentry->d_inode; + int error; +@@ -686,7 +688,7 @@ static int chown_common(struct dentry * + newattrs.ia_valid |= + ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; + mutex_lock(&inode->i_mutex); +- error = notify_change(dentry, &newattrs); ++ error = notify_change(dentry, mnt, &newattrs); + mutex_unlock(&inode->i_mutex); + + return error; +@@ -703,7 +705,7 @@ SYSCALL_DEFINE3(chown, const char __user + error = mnt_want_write(path.mnt); + if (error) + goto out_release; +- error = chown_common(path.dentry, user, group); ++ error = chown_common(path.dentry, path.mnt, user, group); + mnt_drop_write(path.mnt); + out_release: + path_put(&path); +@@ -728,7 +730,7 @@ SYSCALL_DEFINE5(fchownat, int, dfd, cons + error = mnt_want_write(path.mnt); + if (error) + goto out_release; +- error = chown_common(path.dentry, user, group); ++ error = chown_common(path.dentry, path.mnt, user, group); + mnt_drop_write(path.mnt); + out_release: + path_put(&path); +@@ -747,7 +749,7 @@ SYSCALL_DEFINE3(lchown, const char __use + error = mnt_want_write(path.mnt); + if (error) + goto out_release; +- error = chown_common(path.dentry, user, group); ++ error = chown_common(path.dentry, path.mnt, user, group); + mnt_drop_write(path.mnt); + out_release: + path_put(&path); +@@ -770,7 +772,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd + goto out_fput; + dentry = file->f_path.dentry; + audit_inode(NULL, dentry); +- error = chown_common(dentry, user, group); ++ error = chown_common(dentry, file->f_path.mnt, user, group); + mnt_drop_write(file->f_path.mnt); + out_fput: + fput(file); +--- a/fs/utimes.c ++++ b/fs/utimes.c +@@ -102,7 +102,7 @@ static int utimes_common(struct path *pa + } + } + mutex_lock(&inode->i_mutex); +- error = notify_change(path->dentry, &newattrs); ++ error = notify_change(path->dentry, path->mnt, &newattrs); + mutex_unlock(&inode->i_mutex); + + mnt_drop_write_and_out: +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1636,8 +1636,8 @@ static inline int break_lease(struct ino + + /* fs/open.c */ + +-extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, +- struct file *filp); ++extern int do_truncate(struct dentry *, struct vfsmount *, loff_t start, ++ unsigned int time_attrs, struct file *filp); + extern long do_sys_open(int dfd, const char __user *filename, int flags, + int mode); + extern struct file *filp_open(const char *, int, int); +@@ -1798,7 +1798,7 @@ extern int do_remount_sb(struct super_bl + #ifdef CONFIG_BLOCK + extern sector_t bmap(struct inode *, sector_t); + #endif +-extern int notify_change(struct dentry *, struct iattr *); ++extern int notify_change(struct dentry *, struct vfsmount *, struct iattr *); + extern int inode_permission(struct inode *, int); + extern int generic_permission(struct inode *, int, + int (*check_acl)(struct inode *, int)); +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -1831,7 +1831,7 @@ static int __remove_suid(struct path *pa + struct iattr newattrs; + + newattrs.ia_valid = ATTR_FORCE | kill; +- return notify_change(path->dentry, &newattrs); ++ return notify_change(path->dentry, path->mnt, &newattrs); + } + + int file_remove_suid(struct file *file) diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/vfs-removexattr.diff b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-removexattr.diff new file mode 100644 index 0000000000..a3153f4067 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-removexattr.diff @@ -0,0 +1,121 @@ +From: Tony Jones +Subject: Add a struct vfsmount parameter to vfs_removexattr() + +The vfsmount will be passed down to the LSM hook so that LSMs can compute +pathnames. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/nfsd/vfs.c | 11 ++++++----- + fs/xattr.c | 12 ++++++------ + include/linux/xattr.h | 2 +- + 3 files changed, 13 insertions(+), 12 deletions(-) + +--- linux-2.6.27.orig/fs/nfsd/vfs.c ++++ linux-2.6.27/fs/nfsd/vfs.c +@@ -2095,6 +2095,7 @@ nfsd_get_posix_acl(struct svc_fh *fhp, i + int + nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) + { ++ struct vfsmount *mnt; + struct inode *inode = fhp->fh_dentry->d_inode; + char *name; + void *value = NULL; +@@ -2127,22 +2128,22 @@ nfsd_set_posix_acl(struct svc_fh *fhp, i + } else + size = 0; + +- error = mnt_want_write(fhp->fh_export->ex_path.mnt); ++ mnt = fhp->fh_export->ex_path.mnt; ++ error = mnt_want_write(mnt); + if (error) + goto getout; + if (size) +- error = vfs_setxattr(fhp->fh_dentry, fhp->fh_export->ex_path.mnt, +- name, value, size,0); ++ error = vfs_setxattr(fhp->fh_dentry, mnt, name, value, size,0); + else { + if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) + error = 0; + else { +- error = vfs_removexattr(fhp->fh_dentry, name); ++ error = vfs_removexattr(fhp->fh_dentry, mnt, name); + if (error == -ENODATA) + error = 0; + } + } +- mnt_drop_write(fhp->fh_export->ex_path.mnt); ++ mnt_drop_write(mnt); + + getout: + kfree(value); +--- linux-2.6.27.orig/fs/xattr.c ++++ linux-2.6.27/fs/xattr.c +@@ -190,7 +190,7 @@ vfs_listxattr(struct dentry *dentry, str + EXPORT_SYMBOL_GPL(vfs_listxattr); + + int +-vfs_removexattr(struct dentry *dentry, const char *name) ++vfs_removexattr(struct dentry *dentry, struct vfsmount *mnt, const char *name) + { + struct inode *inode = dentry->d_inode; + int error; +@@ -471,7 +471,7 @@ SYSCALL_DEFINE3(flistxattr, int, fd, cha + * Extended attribute REMOVE operations + */ + static long +-removexattr(struct dentry *d, const char __user *name) ++removexattr(struct dentry *dentry, struct vfsmount *mnt, const char __user *name) + { + int error; + char kname[XATTR_NAME_MAX + 1]; +@@ -482,7 +482,7 @@ removexattr(struct dentry *d, const char + if (error < 0) + return error; + +- return vfs_removexattr(d, kname); ++ return vfs_removexattr(dentry, mnt, kname); + } + + SYSCALL_DEFINE2(removexattr, const char __user *, pathname, +@@ -496,7 +496,7 @@ SYSCALL_DEFINE2(removexattr, const char + return error; + error = mnt_want_write(path.mnt); + if (!error) { +- error = removexattr(path.dentry, name); ++ error = removexattr(path.dentry, path.mnt, name); + mnt_drop_write(path.mnt); + } + path_put(&path); +@@ -514,7 +514,7 @@ SYSCALL_DEFINE2(lremovexattr, const char + return error; + error = mnt_want_write(path.mnt); + if (!error) { +- error = removexattr(path.dentry, name); ++ error = removexattr(path.dentry, path.mnt, name); + mnt_drop_write(path.mnt); + } + path_put(&path); +@@ -534,7 +534,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, c + audit_inode(NULL, dentry); + error = mnt_want_write_file(f->f_path.mnt, f); + if (!error) { +- error = removexattr(dentry, name); ++ error = removexattr(dentry, f->f_path.mnt, name); + mnt_drop_write(f->f_path.mnt); + } + fput(f); +--- linux-2.6.27.orig/include/linux/xattr.h ++++ linux-2.6.27/include/linux/xattr.h +@@ -51,7 +51,7 @@ ssize_t xattr_getsecurity(struct inode * + ssize_t vfs_getxattr(struct dentry *, struct vfsmount *, const char *, void *, size_t); + ssize_t vfs_listxattr(struct dentry *d, struct vfsmount *, char *list, size_t size); + int vfs_setxattr(struct dentry *, struct vfsmount *, const char *, const void *, size_t, int); +-int vfs_removexattr(struct dentry *, const char *); ++int vfs_removexattr(struct dentry *, struct vfsmount *mnt, const char *); + + ssize_t generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size); + ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/vfs-rename.diff b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-rename.diff new file mode 100644 index 0000000000..2dd07dab72 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-rename.diff @@ -0,0 +1,125 @@ +From: Tony Jones +Subject: Add struct vfsmount parameters to vfs_rename() + +The vfsmount will be passed down to the LSM hook so that LSMs can compute +pathnames. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/ecryptfs/inode.c | 7 ++++++- + fs/namei.c | 19 ++++++++++++------- + fs/nfsd/vfs.c | 3 ++- + include/linux/fs.h | 2 +- + 4 files changed, 21 insertions(+), 10 deletions(-) + +--- a/fs/ecryptfs/inode.c ++++ b/fs/ecryptfs/inode.c +@@ -590,19 +590,24 @@ ecryptfs_rename(struct inode *old_dir, s + { + int rc; + struct dentry *lower_old_dentry; ++ struct vfsmount *lower_old_mnt; + struct dentry *lower_new_dentry; ++ struct vfsmount *lower_new_mnt; + struct dentry *lower_old_dir_dentry; + struct dentry *lower_new_dir_dentry; + + lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); ++ lower_old_mnt = ecryptfs_dentry_to_lower_mnt(old_dentry); + lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); ++ lower_new_mnt = ecryptfs_dentry_to_lower_mnt(new_dentry); + dget(lower_old_dentry); + dget(lower_new_dentry); + lower_old_dir_dentry = dget_parent(lower_old_dentry); + lower_new_dir_dentry = dget_parent(lower_new_dentry); + lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, +- lower_new_dir_dentry->d_inode, lower_new_dentry); ++ lower_old_mnt, lower_new_dir_dentry->d_inode, ++ lower_new_dentry, lower_new_mnt); + if (rc) + goto out_lock; + fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL); +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -2547,7 +2547,8 @@ SYSCALL_DEFINE2(link, const char __user + * locking]. + */ + static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct vfsmount *old_mnt, struct inode *new_dir, ++ struct dentry *new_dentry, struct vfsmount *new_mnt) + { + int error = 0; + struct inode *target; +@@ -2590,7 +2591,8 @@ static int vfs_rename_dir(struct inode * + } + + static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct vfsmount *old_mnt, struct inode *new_dir, ++ struct dentry *new_dentry, struct vfsmount *new_mnt) + { + struct inode *target; + int error; +@@ -2618,7 +2620,8 @@ static int vfs_rename_other(struct inode + } + + int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct vfsmount *old_mnt, struct inode *new_dir, ++ struct dentry *new_dentry, struct vfsmount *new_mnt) + { + int error; + int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); +@@ -2647,9 +2650,11 @@ int vfs_rename(struct inode *old_dir, st + old_name = fsnotify_oldname_init(old_dentry->d_name.name); + + if (is_dir) +- error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); ++ error = vfs_rename_dir(old_dir, old_dentry, old_mnt, ++ new_dir, new_dentry, new_mnt); + else +- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); ++ error = vfs_rename_other(old_dir, old_dentry, old_mnt, ++ new_dir, new_dentry, new_mnt); + if (!error) { + const char *new_name = old_dentry->d_name.name; + fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir, +@@ -2726,8 +2731,8 @@ SYSCALL_DEFINE4(renameat, int, olddfd, c + error = mnt_want_write(oldnd.path.mnt); + if (error) + goto exit5; +- error = vfs_rename(old_dir->d_inode, old_dentry, +- new_dir->d_inode, new_dentry); ++ error = vfs_rename(old_dir->d_inode, old_dentry, oldnd.path.mnt, ++ new_dir->d_inode, new_dentry, newnd.path.mnt); + mnt_drop_write(oldnd.path.mnt); + exit5: + dput(new_dentry); +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -1752,7 +1752,8 @@ nfsd_rename(struct svc_rqst *rqstp, stru + if (host_err) + goto out_dput_new; + +- host_err = vfs_rename(fdir, odentry, tdir, ndentry); ++ host_err = vfs_rename(fdir, odentry, ffhp->fh_export->ex_path.mnt, ++ tdir, ndentry, tfhp->fh_export->ex_path.mnt); + if (!host_err && EX_ISSYNC(tfhp->fh_export)) { + host_err = nfsd_sync_dir(tdentry); + if (!host_err) +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1184,7 +1184,7 @@ extern int vfs_symlink(struct inode *, s + extern int vfs_link(struct dentry *, struct vfsmount *, struct inode *, struct dentry *, struct vfsmount *); + extern int vfs_rmdir(struct inode *, struct dentry *, struct vfsmount *); + extern int vfs_unlink(struct inode *, struct dentry *, struct vfsmount *); +-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); ++extern int vfs_rename(struct inode *, struct dentry *, struct vfsmount *, struct inode *, struct dentry *, struct vfsmount *); + + /* + * VFS dentry helper functions. diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/vfs-rmdir.diff b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-rmdir.diff new file mode 100644 index 0000000000..d3bd5bcae4 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-rmdir.diff @@ -0,0 +1,123 @@ +From: Tony Jones +Subject: Add a struct vfsmount parameter to vfs_rmdir() + +The vfsmount will be passed down to the LSM hook so that LSMs can compute +pathnames. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/ecryptfs/inode.c | 4 +++- + fs/namei.c | 4 ++-- + fs/nfsd/nfs4recover.c | 2 +- + fs/nfsd/vfs.c | 8 +++++--- + include/linux/fs.h | 2 +- + 5 files changed, 12 insertions(+), 8 deletions(-) + +--- a/fs/ecryptfs/inode.c ++++ b/fs/ecryptfs/inode.c +@@ -534,14 +534,16 @@ out: + static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) + { + struct dentry *lower_dentry; ++ struct vfsmount *lower_mnt; + struct dentry *lower_dir_dentry; + int rc; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); ++ lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + dget(dentry); + lower_dir_dentry = lock_parent(lower_dentry); + dget(lower_dentry); +- rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); ++ rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry, lower_mnt); + dput(lower_dentry); + if (!rc) + d_delete(lower_dentry); +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -2167,7 +2167,7 @@ void dentry_unhash(struct dentry *dentry + spin_unlock(&dcache_lock); + } + +-int vfs_rmdir(struct inode *dir, struct dentry *dentry) ++int vfs_rmdir(struct inode *dir, struct dentry *dentry,struct vfsmount *mnt) + { + int error = may_delete(dir, dentry, 1); + +@@ -2230,7 +2230,7 @@ static long do_rmdir(int dfd, const char + error = mnt_want_write(nd.path.mnt); + if (error) + goto exit3; +- error = vfs_rmdir(nd.path.dentry->d_inode, dentry); ++ error = vfs_rmdir(nd.path.dentry->d_inode, dentry, nd.path.mnt); + mnt_drop_write(nd.path.mnt); + exit3: + dput(dentry); +--- a/fs/nfsd/nfs4recover.c ++++ b/fs/nfsd/nfs4recover.c +@@ -279,7 +279,7 @@ nfsd4_clear_clid_dir(struct dentry *dir, + * a kernel from the future.... */ + nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); +- status = vfs_rmdir(dir->d_inode, dentry); ++ status = vfs_rmdir(dir->d_inode, dentry, rec_dir.path.mnt); + mutex_unlock(&dir->d_inode->i_mutex); + return status; + } +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -1790,6 +1790,7 @@ nfsd_unlink(struct svc_rqst *rqstp, stru + char *fname, int flen) + { + struct dentry *dentry, *rdentry; ++ struct svc_export *exp; + struct inode *dirp; + __be32 err; + int host_err; +@@ -1804,6 +1805,7 @@ nfsd_unlink(struct svc_rqst *rqstp, stru + fh_lock_nested(fhp, I_MUTEX_PARENT); + dentry = fhp->fh_dentry; + dirp = dentry->d_inode; ++ exp = fhp->fh_export; + + rdentry = lookup_one_len(fname, dentry, flen); + host_err = PTR_ERR(rdentry); +@@ -1825,21 +1827,21 @@ nfsd_unlink(struct svc_rqst *rqstp, stru + + if (type != S_IFDIR) { /* It's UNLINK */ + #ifdef MSNFS +- if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && ++ if ((exp->ex_flags & NFSEXP_MSNFS) && + (atomic_read(&rdentry->d_count) > 1)) { + host_err = -EPERM; + } else + #endif + host_err = vfs_unlink(dirp, rdentry); + } else { /* It's RMDIR */ +- host_err = vfs_rmdir(dirp, rdentry); ++ host_err = vfs_rmdir(dirp, rdentry, exp->ex_path.mnt); + } + + dput(rdentry); + + if (host_err) + goto out_drop; +- if (EX_ISSYNC(fhp->fh_export)) ++ if (EX_ISSYNC(exp)) + host_err = nfsd_sync_dir(dentry); + + out_drop: +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1182,7 +1182,7 @@ extern int vfs_mkdir(struct inode *, str + extern int vfs_mknod(struct inode *, struct dentry *, struct vfsmount *, int, dev_t); + extern int vfs_symlink(struct inode *, struct dentry *, struct vfsmount *, const char *); + extern int vfs_link(struct dentry *, struct vfsmount *, struct inode *, struct dentry *, struct vfsmount *); +-extern int vfs_rmdir(struct inode *, struct dentry *); ++extern int vfs_rmdir(struct inode *, struct dentry *, struct vfsmount *); + extern int vfs_unlink(struct inode *, struct dentry *); + extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); + diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/vfs-setxattr.diff b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-setxattr.diff new file mode 100644 index 0000000000..f95a346409 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-setxattr.diff @@ -0,0 +1,159 @@ +From: Tony Jones +Subject: Add a struct vfsmount parameter to vfs_setxattr() + +The vfsmount will be passed down to the LSM hook so that LSMs can compute +pathnames. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/nfsd/vfs.c | 16 +++++++++++----- + fs/xattr.c | 16 ++++++++-------- + include/linux/xattr.h | 3 ++- + 3 files changed, 21 insertions(+), 14 deletions(-) + +--- linux-2.6.27.orig/fs/nfsd/vfs.c ++++ linux-2.6.27/fs/nfsd/vfs.c +@@ -438,7 +438,8 @@ static ssize_t nfsd_getxattr(struct dent + + #if defined(CONFIG_NFSD_V4) + static int +-set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) ++set_nfsv4_acl_one(struct dentry *dentry, struct vfsmount *mnt, ++ struct posix_acl *pacl, char *key) + { + int len; + size_t buflen; +@@ -457,7 +458,7 @@ set_nfsv4_acl_one(struct dentry *dentry, + goto out; + } + +- error = vfs_setxattr(dentry, key, buf, len, 0); ++ error = vfs_setxattr(dentry, mnt, key, buf, len, 0); + out: + kfree(buf); + return error; +@@ -470,6 +471,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqst + __be32 error; + int host_error; + struct dentry *dentry; ++ struct vfsmount *mnt; + struct inode *inode; + struct posix_acl *pacl = NULL, *dpacl = NULL; + unsigned int flags = 0; +@@ -480,6 +482,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqst + return error; + + dentry = fhp->fh_dentry; ++ mnt = fhp->fh_export->ex_path.mnt; + inode = dentry->d_inode; + if (S_ISDIR(inode->i_mode)) + flags = NFS4_ACL_DIR; +@@ -490,12 +493,14 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqst + } else if (host_error < 0) + goto out_nfserr; + +- host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); ++ host_error = set_nfsv4_acl_one(dentry, mnt, pacl, ++ POSIX_ACL_XATTR_ACCESS); + if (host_error < 0) + goto out_release; + + if (S_ISDIR(inode->i_mode)) +- host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); ++ host_error = set_nfsv4_acl_one(dentry, mnt, dpacl, ++ POSIX_ACL_XATTR_DEFAULT); + + out_release: + posix_acl_release(pacl); +@@ -2123,7 +2128,8 @@ nfsd_set_posix_acl(struct svc_fh *fhp, i + if (error) + goto getout; + if (size) +- error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0); ++ error = vfs_setxattr(fhp->fh_dentry, fhp->fh_export->ex_path.mnt, ++ name, value, size,0); + else { + if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) + error = 0; +--- linux-2.6.27.orig/fs/xattr.c ++++ linux-2.6.27/fs/xattr.c +@@ -67,8 +67,8 @@ xattr_permission(struct inode *inode, co + } + + int +-vfs_setxattr(struct dentry *dentry, const char *name, const void *value, +- size_t size, int flags) ++vfs_setxattr(struct dentry *dentry, struct vfsmount *mnt, const char *name, ++ const void *value, size_t size, int flags) + { + struct inode *inode = dentry->d_inode; + int error; +@@ -218,8 +218,8 @@ EXPORT_SYMBOL_GPL(vfs_removexattr); + * Extended attribute SET operations + */ + static long +-setxattr(struct dentry *d, const char __user *name, const void __user *value, +- size_t size, int flags) ++setxattr(struct dentry *dentry, struct vfsmount *mnt, const char __user *name, ++ const void __user *value, size_t size, int flags) + { + int error; + void *kvalue = NULL; +@@ -246,7 +246,7 @@ setxattr(struct dentry *d, const char __ + } + } + +- error = vfs_setxattr(d, kname, kvalue, size, flags); ++ error = vfs_setxattr(dentry, mnt, kname, kvalue, size, flags); + kfree(kvalue); + return error; + } +@@ -263,7 +263,7 @@ SYSCALL_DEFINE5(setxattr, const char __u + return error; + error = mnt_want_write(path.mnt); + if (!error) { +- error = setxattr(path.dentry, name, value, size, flags); ++ error = setxattr(path.dentry, path.mnt, name, value, size, flags); + mnt_drop_write(path.mnt); + } + path_put(&path); +@@ -282,7 +282,7 @@ SYSCALL_DEFINE5(lsetxattr, const char __ + return error; + error = mnt_want_write(path.mnt); + if (!error) { +- error = setxattr(path.dentry, name, value, size, flags); ++ error = setxattr(path.dentry, path.mnt, name, value, size, flags); + mnt_drop_write(path.mnt); + } + path_put(&path); +@@ -303,7 +303,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, cons + audit_inode(NULL, dentry); + error = mnt_want_write_file(f->f_path.mnt, f); + if (!error) { +- error = setxattr(dentry, name, value, size, flags); ++ error = setxattr(dentry, f->f_vfsmnt, name, value, size, flags); + mnt_drop_write(f->f_path.mnt); + } + fput(f); +--- linux-2.6.27.orig/include/linux/xattr.h ++++ linux-2.6.27/include/linux/xattr.h +@@ -16,6 +16,7 @@ + #ifdef __KERNEL__ + + #include ++#include + + /* Namespaces */ + #define XATTR_OS2_PREFIX "os2." +@@ -49,7 +50,7 @@ struct xattr_handler { + ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); + ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t); + ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); +-int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int); ++int vfs_setxattr(struct dentry *, struct vfsmount *, const char *, const void *, size_t, int); + int vfs_removexattr(struct dentry *, const char *); + + ssize_t generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size); diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/vfs-symlink.diff b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-symlink.diff new file mode 100644 index 0000000000..5751f91180 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-symlink.diff @@ -0,0 +1,123 @@ +From: Tony Jones +Subject: Add a struct vfsmount parameter to vfs_symlink() + +The vfsmount will be passed down to the LSM hook so that LSMs can compute +pathnames. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/ecryptfs/inode.c | 4 +++- + fs/namei.c | 5 +++-- + fs/nfsd/vfs.c | 12 ++++++++---- + include/linux/fs.h | 2 +- + 4 files changed, 15 insertions(+), 8 deletions(-) + +--- a/fs/ecryptfs/inode.c ++++ b/fs/ecryptfs/inode.c +@@ -464,6 +464,7 @@ static int ecryptfs_symlink(struct inode + { + int rc; + struct dentry *lower_dentry; ++ struct vfsmount *lower_mnt; + struct dentry *lower_dir_dentry; + char *encoded_symname; + int encoded_symlen; +@@ -471,6 +472,7 @@ static int ecryptfs_symlink(struct inode + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + dget(lower_dentry); ++ lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + lower_dir_dentry = lock_parent(lower_dentry); + encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname, + strlen(symname), +@@ -479,7 +481,7 @@ static int ecryptfs_symlink(struct inode + rc = encoded_symlen; + goto out_lock; + } +- rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry, ++ rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry, lower_mnt, + encoded_symname); + kfree(encoded_symname); + if (rc || !lower_dentry->d_inode) +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -2347,7 +2347,8 @@ SYSCALL_DEFINE1(unlink, const char __use + return do_unlinkat(AT_FDCWD, pathname); + } + +-int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) ++int vfs_symlink(struct inode *dir, struct dentry *dentry, struct vfsmount *mnt, ++ const char *oldname) + { + int error = may_create(dir, dentry, 0); + +@@ -2393,7 +2394,7 @@ SYSCALL_DEFINE3(symlinkat, const char __ + error = mnt_want_write(nd.path.mnt); + if (error) + goto out_dput; +- error = vfs_symlink(nd.path.dentry->d_inode, dentry, from); ++ error = vfs_symlink(nd.path.dentry->d_inode, dentry, nd.path.mnt, from); + mnt_drop_write(nd.path.mnt); + out_dput: + dput(dentry); +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -1545,6 +1545,7 @@ nfsd_symlink(struct svc_rqst *rqstp, str + struct iattr *iap) + { + struct dentry *dentry, *dnew; ++ struct svc_export *exp; + __be32 err, cerr; + int host_err; + +@@ -1569,6 +1570,7 @@ nfsd_symlink(struct svc_rqst *rqstp, str + if (host_err) + goto out_nfserr; + ++ exp = fhp->fh_export; + if (unlikely(path[plen] != 0)) { + char *path_alloced = kmalloc(plen+1, GFP_KERNEL); + if (path_alloced == NULL) +@@ -1576,14 +1578,16 @@ nfsd_symlink(struct svc_rqst *rqstp, str + else { + strncpy(path_alloced, path, plen); + path_alloced[plen] = 0; +- host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced); ++ host_err = vfs_symlink(dentry->d_inode, dnew, ++ exp->ex_path.mnt, path_alloced); + kfree(path_alloced); + } + } else +- host_err = vfs_symlink(dentry->d_inode, dnew, path); ++ host_err = vfs_symlink(dentry->d_inode, dnew, exp->ex_path.mnt, ++ path); + + if (!host_err) { +- if (EX_ISSYNC(fhp->fh_export)) ++ if (EX_ISSYNC(exp)) + host_err = nfsd_sync_dir(dentry); + } + err = nfserrno(host_err); +@@ -1591,7 +1595,7 @@ nfsd_symlink(struct svc_rqst *rqstp, str + + mnt_drop_write(fhp->fh_export->ex_path.mnt); + +- cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); ++ cerr = fh_compose(resfhp, exp, dnew, fhp); + dput(dnew); + if (err==0) err = cerr; + out: +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1180,7 +1180,7 @@ extern int vfs_permission(struct nameida + extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); + extern int vfs_mkdir(struct inode *, struct dentry *, struct vfsmount *, int); + extern int vfs_mknod(struct inode *, struct dentry *, struct vfsmount *, int, dev_t); +-extern int vfs_symlink(struct inode *, struct dentry *, const char *); ++extern int vfs_symlink(struct inode *, struct dentry *, struct vfsmount *, const char *); + extern int vfs_link(struct dentry *, struct inode *, struct dentry *); + extern int vfs_rmdir(struct inode *, struct dentry *); + extern int vfs_unlink(struct inode *, struct dentry *); diff --git a/src/patches/suse-2.6.27.25/patches.apparmor/vfs-unlink.diff b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-unlink.diff new file mode 100644 index 0000000000..cbf092a2f5 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.apparmor/vfs-unlink.diff @@ -0,0 +1,99 @@ +From: Tony Jones +Subject: Add a struct vfsmount parameter to vfs_unlink() + +The vfsmount will be passed down to the LSM hook so that LSMs can compute +pathnames. + +Signed-off-by: Tony Jones +Signed-off-by: Andreas Gruenbacher +Signed-off-by: John Johansen + +--- + fs/ecryptfs/inode.c | 3 ++- + fs/namei.c | 4 ++-- + fs/nfsd/nfs4recover.c | 2 +- + fs/nfsd/vfs.c | 2 +- + include/linux/fs.h | 2 +- + ipc/mqueue.c | 2 +- + 6 files changed, 8 insertions(+), 7 deletions(-) + +--- a/fs/ecryptfs/inode.c ++++ b/fs/ecryptfs/inode.c +@@ -445,11 +445,12 @@ static int ecryptfs_unlink(struct inode + { + int rc = 0; + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); ++ struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); + struct dentry *lower_dir_dentry; + + lower_dir_dentry = lock_parent(lower_dentry); +- rc = vfs_unlink(lower_dir_inode, lower_dentry); ++ rc = vfs_unlink(lower_dir_inode, lower_dentry, lower_mnt); + if (rc) { + printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); + goto out_unlock; +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -2248,7 +2248,7 @@ SYSCALL_DEFINE1(rmdir, const char __user + return do_rmdir(AT_FDCWD, pathname); + } + +-int vfs_unlink(struct inode *dir, struct dentry *dentry) ++int vfs_unlink(struct inode *dir, struct dentry *dentry, struct vfsmount *mnt) + { + int error = may_delete(dir, dentry, 0); + +@@ -2313,7 +2313,7 @@ static long do_unlinkat(int dfd, const c + error = mnt_want_write(nd.path.mnt); + if (error) + goto exit2; +- error = vfs_unlink(nd.path.dentry->d_inode, dentry); ++ error = vfs_unlink(nd.path.dentry->d_inode, dentry, nd.path.mnt); + mnt_drop_write(nd.path.mnt); + exit2: + dput(dentry); +--- a/fs/nfsd/nfs4recover.c ++++ b/fs/nfsd/nfs4recover.c +@@ -264,7 +264,7 @@ nfsd4_remove_clid_file(struct dentry *di + return -EINVAL; + } + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); +- status = vfs_unlink(dir->d_inode, dentry); ++ status = vfs_unlink(dir->d_inode, dentry, rec_dir.path.mnt); + mutex_unlock(&dir->d_inode->i_mutex); + return status; + } +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -1832,7 +1832,7 @@ nfsd_unlink(struct svc_rqst *rqstp, stru + host_err = -EPERM; + } else + #endif +- host_err = vfs_unlink(dirp, rdentry); ++ host_err = vfs_unlink(dirp, rdentry, exp->ex_path.mnt); + } else { /* It's RMDIR */ + host_err = vfs_rmdir(dirp, rdentry, exp->ex_path.mnt); + } +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1183,7 +1183,7 @@ extern int vfs_mknod(struct inode *, str + extern int vfs_symlink(struct inode *, struct dentry *, struct vfsmount *, const char *); + extern int vfs_link(struct dentry *, struct vfsmount *, struct inode *, struct dentry *, struct vfsmount *); + extern int vfs_rmdir(struct inode *, struct dentry *, struct vfsmount *); +-extern int vfs_unlink(struct inode *, struct dentry *); ++extern int vfs_unlink(struct inode *, struct dentry *, struct vfsmount *); + extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); + + /* +--- a/ipc/mqueue.c ++++ b/ipc/mqueue.c +@@ -746,7 +746,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __ + err = mnt_want_write(mqueue_mnt); + if (err) + goto out_err; +- err = vfs_unlink(dentry->d_parent->d_inode, dentry); ++ err = vfs_unlink(dentry->d_parent->d_inode, dentry, mqueue_mnt); + mnt_drop_write(mqueue_mnt); + out_err: + dput(dentry); diff --git a/src/patches/suse-2.6.27.25/patches.arch/ia64-node_mem_map-node_start_pfn.diff b/src/patches/suse-2.6.27.25/patches.arch/ia64-node_mem_map-node_start_pfn.diff new file mode 100644 index 0000000000..8835c46f22 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.arch/ia64-node_mem_map-node_start_pfn.diff @@ -0,0 +1,48 @@ +From: Bernhard Walle +Subject: [PATCH] Fix memory map for ia64/discontmem for kdump + +makedumpfile[1] cannot run on ia64 discontigmem kernel, because the member +node_mem_map of struct pgdat_list has invalid value. This patch fixes it. + +node_start_pfn shows the start pfn of each node, and node_mem_map should +point 'struct page' of each node's node_start_pfn. +On my machine, node0's node_start_pfn shows 0x400 and its node_mem_map points +0xa0007fffbf000000. This address is the same as vmem_map, so the node_mem_map +points 'struct page' of pfn 0, even if its node_start_pfn shows 0x400. + +The cause is due to the round down of min_pfn in count_node_pages() and +node0's node_mem_map points 'struct page' of inactive pfn (0x0). +This patch fixes it. + + +makedumpfile[1]: dump filtering command +https://sourceforge.net/projects/makedumpfile/ + +Signed-off-by: Ken'ichi Ohmichi +Acked-by: Bernhard Walle + +--- + arch/ia64/include/asm/meminit.h | 1 - + arch/ia64/mm/discontig.c | 1 - + 2 files changed, 2 deletions(-) + +--- a/arch/ia64/include/asm/meminit.h ++++ b/arch/ia64/include/asm/meminit.h +@@ -47,7 +47,6 @@ extern int reserve_elfcorehdr(unsigned l + */ + #define GRANULEROUNDDOWN(n) ((n) & ~(IA64_GRANULE_SIZE-1)) + #define GRANULEROUNDUP(n) (((n)+IA64_GRANULE_SIZE-1) & ~(IA64_GRANULE_SIZE-1)) +-#define ORDERROUNDDOWN(n) ((n) & ~((PAGE_SIZE<>PAGE_SHIFT; + #endif + start = GRANULEROUNDDOWN(start); +- start = ORDERROUNDDOWN(start); + end = GRANULEROUNDUP(end); + mem_data[node].max_pfn = max(mem_data[node].max_pfn, + end >> PAGE_SHIFT); diff --git a/src/patches/suse-2.6.27.25/patches.arch/ppc-axon-missing-msi-workaround-5.diff b/src/patches/suse-2.6.27.25/patches.arch/ppc-axon-missing-msi-workaround-5.diff new file mode 100644 index 0000000000..29d11c4788 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.arch/ppc-axon-missing-msi-workaround-5.diff @@ -0,0 +1,143 @@ +From: Arnd Bergmann +Subject: powerpc/cell/axon-msi: retry on missing interrupt +References: bnc#445964,bnc#467633 + +The MSI capture logic on the axon bridge can sometimes +lose interrupts in case of high DMA and interrupt load, +when it signals an MSI interrupt to the MPIC interrupt +controller while we are already handling another MSI. + +Each MSI vector gets written into a FIFO buffer in main +memory using DMA, and that DMA access is normally flushed +by the actual interrupt packet on the IOIF. An MMIO +register in the MSIC holds the position of the last +entry in the FIFO buffer that was written. However, +reading that position does not flush the DMA, so that +we can observe stale data in the buffer. + +In a stress test, we have observed the DMA to arrive +up to 14 microseconds after reading the register. +We can reliably detect this conditioning by writing +an invalid MSI vector into the FIFO buffer after +reading from it, assuming that all MSIs we get +are valid. After detecting an invalid MSI vector, +we udelay(1) in the interrupt cascade for up to +100 times before giving up. + +Signed-off-by: Arnd Bergmann +Acked-by: John Jolly + +commit 23e0e8afafd9ac065d81506524adf3339584044b +Author: Arnd Bergmann +Date: Fri Dec 12 09:19:50 2008 +0000 + + powerpc/cell/axon-msi: Fix MSI after kexec + + Commit d015fe995 'powerpc/cell/axon-msi: Retry on missing interrupt' + has turned a rare failure to kexec on QS22 into a reproducible + error, which we have now analysed. + + The problem is that after a kexec, the MSIC hardware still points + into the middle of the old ring buffer. We set up the ring buffer + during reboot, but not the offset into it. On older kernels, this + would cause a storm of thousands of spurious interrupts after a + kexec, which would most of the time get dropped silently. + + With the new code, we time out on each interrupt, waiting for + it to become valid. If more interrupts come in that we time + out on, this goes on indefinitely, which eventually leads to + a hard crash. + + The solution in this commit is to read the current offset from + the MSIC when reinitializing it. This now works correctly, as + expected. + + Reported-by: Dirk Herrendoerfer + Signed-off-by: Arnd Bergmann + Acked-by: Michael Ellerman + Signed-off-by: Paul Mackerras + + +--- + arch/powerpc/platforms/cell/axon_msi.c | 39 ++++++++++++++++++++++++++++----- + 1 file changed, 34 insertions(+), 5 deletions(-) + +--- a/arch/powerpc/platforms/cell/axon_msi.c ++++ b/arch/powerpc/platforms/cell/axon_msi.c +@@ -95,6 +95,7 @@ static void axon_msi_cascade(unsigned in + struct axon_msic *msic = get_irq_data(irq); + u32 write_offset, msi; + int idx; ++ int retry = 0; + + write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG); + pr_debug("axon_msi: original write_offset 0x%x\n", write_offset); +@@ -102,7 +103,7 @@ static void axon_msi_cascade(unsigned in + /* write_offset doesn't wrap properly, so we have to mask it */ + write_offset &= MSIC_FIFO_SIZE_MASK; + +- while (msic->read_offset != write_offset) { ++ while (msic->read_offset != write_offset && retry < 100) { + idx = msic->read_offset / sizeof(__le32); + msi = le32_to_cpu(msic->fifo_virt[idx]); + msi &= 0xFFFF; +@@ -110,13 +111,37 @@ static void axon_msi_cascade(unsigned in + pr_debug("axon_msi: woff %x roff %x msi %x\n", + write_offset, msic->read_offset, msi); + ++ if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host) { ++ generic_handle_irq(msi); ++ msic->fifo_virt[idx] = cpu_to_le32(0xffffffff); ++ } else { ++ /* ++ * Reading the MSIC_WRITE_OFFSET_REG does not ++ * reliably flush the outstanding DMA to the ++ * FIFO buffer. Here we were reading stale ++ * data, so we need to retry. ++ */ ++ udelay(1); ++ retry++; ++ pr_debug("axon_msi: invalid irq 0x%x!\n", msi); ++ continue; ++ } ++ ++ if (retry) { ++ pr_debug("axon_msi: late irq 0x%x, retry %d\n", ++ msi, retry); ++ retry = 0; ++ } ++ + msic->read_offset += MSIC_FIFO_ENTRY_SIZE; + msic->read_offset &= MSIC_FIFO_SIZE_MASK; ++ } + +- if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host) +- generic_handle_irq(msi); +- else +- pr_debug("axon_msi: invalid irq 0x%x!\n", msi); ++ if (retry) { ++ printk(KERN_WARNING "axon_msi: irq timed out\n"); ++ ++ msic->read_offset += MSIC_FIFO_ENTRY_SIZE; ++ msic->read_offset &= MSIC_FIFO_SIZE_MASK; + } + + desc->chip->eoi(irq); +@@ -364,6 +389,7 @@ static int axon_msi_probe(struct of_devi + dn->full_name); + goto out_free_fifo; + } ++ memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES); + + msic->irq_host = irq_alloc_host(dn, IRQ_HOST_MAP_NOMAP, + NR_IRQS, &msic_host_ops, 0); +@@ -387,6 +413,9 @@ static int axon_msi_probe(struct of_devi + MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE | + MSIC_CTRL_FIFO_SIZE); + ++ msic->read_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG) ++ & MSIC_FIFO_SIZE_MASK; ++ + device->dev.platform_data = msic; + + ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs; diff --git a/src/patches/suse-2.6.27.25/patches.arch/ppc-dynamic-reconfiguration.diff b/src/patches/suse-2.6.27.25/patches.arch/ppc-dynamic-reconfiguration.diff new file mode 100644 index 0000000000..c697695be2 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.arch/ppc-dynamic-reconfiguration.diff @@ -0,0 +1,215 @@ +From: Chandru +Date: Sat, 30 Aug 2008 00:28:16 +1000 +Subject: [PATCH] powerpc: Add support for dynamic reconfiguration memory in kexec/kdump kernels +References: bnc#431492 +X-Git-Commit: cf00085d8045cddd80a8aabad97de96fa8131793 Mon Sep 17 00:00:00 2001 + +Kdump kernel needs to use only those memory regions that it is allowed +to use (crashkernel, rtas, tce, etc.). Each of these regions have +their own sizes and are currently added under 'linux,usable-memory' +property under each memory@xxx node of the device tree. + +The ibm,dynamic-memory property of ibm,dynamic-reconfiguration-memory +node (on POWER6) now stores in it the representation for most of the +logical memory blocks with the size of each memory block being a +constant (lmb_size). If one or more or part of the above mentioned +regions lie under one of the lmb from ibm,dynamic-memory property, +there is a need to identify those regions within the given lmb. + +This makes the kernel recognize a new 'linux,drconf-usable-memory' +property added by kexec-tools. Each entry in this property is of the +form of a count followed by that many (base, size) pairs for the above +mentioned regions. The number of cells in the count value is given by +the #size-cells property of the root node. + +Signed-off-by: Chandru Siddalingappa +Signed-off-by: Paul Mackerras +Acked-by: Bernhard Walle + +--- + arch/powerpc/kernel/prom.c | 40 +++++++++++++++++++--- + arch/powerpc/mm/numa.c | 79 +++++++++++++++++++++++++++++++++++---------- + 2 files changed, 96 insertions(+), 23 deletions(-) + +--- a/arch/powerpc/kernel/prom.c ++++ b/arch/powerpc/kernel/prom.c +@@ -888,9 +888,10 @@ static u64 __init dt_mem_next_cell(int s + */ + static int __init early_init_dt_scan_drconf_memory(unsigned long node) + { +- cell_t *dm, *ls; ++ cell_t *dm, *ls, *usm; + unsigned long l, n, flags; + u64 base, size, lmb_size; ++ unsigned int is_kexec_kdump = 0, rngs; + + ls = (cell_t *)of_get_flat_dt_prop(node, "ibm,lmb-size", &l); + if (ls == NULL || l < dt_root_size_cells * sizeof(cell_t)) +@@ -905,6 +906,12 @@ static int __init early_init_dt_scan_drc + if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(cell_t)) + return 0; + ++ /* check if this is a kexec/kdump kernel. */ ++ usm = (cell_t *)of_get_flat_dt_prop(node, "linux,drconf-usable-memory", ++ &l); ++ if (usm != NULL) ++ is_kexec_kdump = 1; ++ + for (; n != 0; --n) { + base = dt_mem_next_cell(dt_root_addr_cells, &dm); + flags = dm[3]; +@@ -915,13 +922,34 @@ static int __init early_init_dt_scan_drc + if ((flags & 0x80) || !(flags & 0x8)) + continue; + size = lmb_size; +- if (iommu_is_off) { +- if (base >= 0x80000000ul) ++ rngs = 1; ++ if (is_kexec_kdump) { ++ /* ++ * For each lmb in ibm,dynamic-memory, a corresponding ++ * entry in linux,drconf-usable-memory property contains ++ * a counter 'p' followed by 'p' (base, size) duple. ++ * Now read the counter from ++ * linux,drconf-usable-memory property ++ */ ++ rngs = dt_mem_next_cell(dt_root_size_cells, &usm); ++ if (!rngs) /* there are no (base, size) duple */ + continue; +- if ((base + size) > 0x80000000ul) +- size = 0x80000000ul - base; + } +- lmb_add(base, size); ++ do { ++ if (is_kexec_kdump) { ++ base = dt_mem_next_cell(dt_root_addr_cells, ++ &usm); ++ size = dt_mem_next_cell(dt_root_size_cells, ++ &usm); ++ } ++ if (iommu_is_off) { ++ if (base >= 0x80000000ul) ++ continue; ++ if ((base + size) > 0x80000000ul) ++ size = 0x80000000ul - base; ++ } ++ lmb_add(base, size); ++ } while (--rngs); + } + lmb_dump_all(); + return 0; +--- a/arch/powerpc/mm/numa.c ++++ b/arch/powerpc/mm/numa.c +@@ -192,6 +192,21 @@ static const int *of_get_associativity(s + return of_get_property(dev, "ibm,associativity", NULL); + } + ++/* ++ * Returns the property linux,drconf-usable-memory if ++ * it exists (the property exists only in kexec/kdump kernels, ++ * added by kexec-tools) ++ */ ++static const u32 *of_get_usable_memory(struct device_node *memory) ++{ ++ const u32 *prop; ++ u32 len; ++ prop = of_get_property(memory, "linux,drconf-usable-memory", &len); ++ if (!prop || len < sizeof(unsigned int)) ++ return 0; ++ return prop; ++} ++ + /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa + * info is found. + */ +@@ -529,14 +544,29 @@ static unsigned long __init numa_enforce + } + + /* ++ * Reads the counter for a given entry in ++ * linux,drconf-usable-memory property ++ */ ++static inline int __init read_usm_ranges(const u32 **usm) ++{ ++ /* ++ * For each lmb in ibm,dynamic-memory a corresponding ++ * entry in linux,drconf-usable-memory property contains ++ * a counter followed by that many (base, size) duple. ++ * read the counter from linux,drconf-usable-memory ++ */ ++ return read_n_cells(n_mem_size_cells, usm); ++} ++ ++/* + * Extract NUMA information from the ibm,dynamic-reconfiguration-memory + * node. This assumes n_mem_{addr,size}_cells have been set. + */ + static void __init parse_drconf_memory(struct device_node *memory) + { +- const u32 *dm; +- unsigned int n, rc; +- unsigned long lmb_size, size; ++ const u32 *dm, *usm; ++ unsigned int n, rc, ranges, is_kexec_kdump = 0; ++ unsigned long lmb_size, base, size, sz; + int nid; + struct assoc_arrays aa; + +@@ -552,6 +582,11 @@ static void __init parse_drconf_memory(s + if (rc) + return; + ++ /* check if this is a kexec/kdump kernel */ ++ usm = of_get_usable_memory(memory); ++ if (usm != NULL) ++ is_kexec_kdump = 1; ++ + for (; n != 0; --n) { + struct of_drconf_cell drmem; + +@@ -563,21 +598,31 @@ static void __init parse_drconf_memory(s + || !(drmem.flags & DRCONF_MEM_ASSIGNED)) + continue; + +- nid = of_drconf_to_nid_single(&drmem, &aa); +- +- fake_numa_create_new_node( +- ((drmem.base_addr + lmb_size) >> PAGE_SHIFT), ++ base = drmem.base_addr; ++ size = lmb_size; ++ ranges = 1; ++ ++ if (is_kexec_kdump) { ++ ranges = read_usm_ranges(&usm); ++ if (!ranges) /* there are no (base, size) duple */ ++ continue; ++ } ++ do { ++ if (is_kexec_kdump) { ++ base = read_n_cells(n_mem_addr_cells, &usm); ++ size = read_n_cells(n_mem_size_cells, &usm); ++ } ++ nid = of_drconf_to_nid_single(&drmem, &aa); ++ fake_numa_create_new_node( ++ ((base + size) >> PAGE_SHIFT), + &nid); +- +- node_set_online(nid); +- +- size = numa_enforce_memory_limit(drmem.base_addr, lmb_size); +- if (!size) +- continue; +- +- add_active_range(nid, drmem.base_addr >> PAGE_SHIFT, +- (drmem.base_addr >> PAGE_SHIFT) +- + (size >> PAGE_SHIFT)); ++ node_set_online(nid); ++ sz = numa_enforce_memory_limit(base, size); ++ if (sz) ++ add_active_range(nid, base >> PAGE_SHIFT, ++ (base >> PAGE_SHIFT) ++ + (sz >> PAGE_SHIFT)); ++ } while (--ranges); + } + } + diff --git a/src/patches/suse-2.6.27.25/patches.arch/ppc-vmcoreinfo.diff b/src/patches/suse-2.6.27.25/patches.arch/ppc-vmcoreinfo.diff new file mode 100644 index 0000000000..1ab7978d5e --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.arch/ppc-vmcoreinfo.diff @@ -0,0 +1,41 @@ +Date: Thu, 9 Oct 2008 11:20:27 -0400 +From: Neil Horman +To: linux-kernel@vger.kernel.org, kexec@lists.infradead.org, + vgoyal@redhat.com, hbabu@us.ibm.com +Subject: [PATCH] add additional symbols to /sys/kernel/vmcoreinfo data for + ppc(64) +Cc: nhorman@tuxdriver.com + +Hey- + The makdumpdile dump filtering program, in some modes of operation needs +the node_data and/or contig_page_data symbols to function properly. These +symbols are missing from the powerpc kernel. This patch adds those symbols in +properly. Tested successfully by myself and the reporter. + +Regards +Neil + +Signed-off-by: Neil Horman +Acked-by: Bernhard Walle + + arch/powerpc/kernel/machine_kexec.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + + +--- a/arch/powerpc/kernel/machine_kexec.c ++++ b/arch/powerpc/kernel/machine_kexec.c +@@ -44,6 +44,14 @@ void machine_kexec_cleanup(struct kimage + ppc_md.machine_kexec_cleanup(image); + } + ++void arch_crash_save_vmcoreinfo(void) ++{ ++#ifdef CONFIG_NEED_MULTIPLE_NODES ++ VMCOREINFO_SYMBOL(node_data); ++ VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); ++#endif ++} ++ + /* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. diff --git a/src/patches/suse-2.6.27.25/patches.arch/s390-04-06-cio-sac-update.diff b/src/patches/suse-2.6.27.25/patches.arch/s390-04-06-cio-sac-update.diff new file mode 100644 index 0000000000..8ce3f377d8 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.arch/s390-04-06-cio-sac-update.diff @@ -0,0 +1,28 @@ +From: Gerald Schaefer +Subject: cio: update sac values +References: bnc#445100 + +Symptom: Drivers based on fcx fail to start I/O. +Problem: Values for the sac field have changed. +Solution: Update code accordingly. + +Acked-by: John Jolly +--- + arch/s390/include/asm/fcx.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +Index: linux-sles11/arch/s390/include/asm/fcx.h +=================================================================== +--- linux-sles11.orig/arch/s390/include/asm/fcx.h ++++ linux-sles11/arch/s390/include/asm/fcx.h +@@ -248,8 +248,8 @@ struct dcw { + #define TCCB_MAX_SIZE (sizeof(struct tccb_tcah) + \ + TCCB_MAX_DCW * sizeof(struct dcw) + \ + sizeof(struct tccb_tcat)) +-#define TCCB_SAC_DEFAULT 0xf901 +-#define TCCB_SAC_INTRG 0xf902 ++#define TCCB_SAC_DEFAULT 0x1ffe ++#define TCCB_SAC_INTRG 0x1fff + + /** + * struct tccb_tcah - Transport-Command-Area Header (TCAH) diff --git a/src/patches/suse-2.6.27.25/patches.arch/s390-07-01-zfcp-port-failed-message.diff b/src/patches/suse-2.6.27.25/patches.arch/s390-07-01-zfcp-port-failed-message.diff new file mode 100644 index 0000000000..80cc7c946d --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.arch/s390-07-01-zfcp-port-failed-message.diff @@ -0,0 +1,73 @@ +From: Gerald Schaefer +Subject: zfcp: Remove message for failed port +References: bnc#464466 + +Symptom: During opening of an adapter the message "Remote port ... + could not be opened" is emitted for initiator ports, + confusing users. +Problem: The port scan tries to open all ports, including + initiator ports to determine if they are target ports. + Sometimes, a different error status is returned for the + initiator ports, triggering the message mentioned above. +Solution: Remove the message, target port failures will be checked + later in the error recovery, printing a different message + if necessary. + +Acked-by: John Jolly +--- + Documentation/kmsg/s390/zfcp | 15 --------------- + drivers/s390/scsi/zfcp_dbf.c | 2 +- + drivers/s390/scsi/zfcp_fsf.c | 6 ------ + 3 files changed, 1 insertion(+), 22 deletions(-) + +--- a/Documentation/kmsg/s390/zfcp 2008-12-19 13:18:45.000000000 +0100 ++++ b/Documentation/kmsg/s390/zfcp 2008-12-19 13:18:59.000000000 +0100 +@@ -677,21 +677,6 @@ + */ + + /*? +- * Text: "%s: Remote port 0x%016Lx could not be opened\n" +- * Severity: Warning +- * Parameter: +- * @1: bus ID of the zfcp device +- * @2: WWPN +- * Description: +- * The FCP adapter rejected a request to open the specified port. No retry +- * is possible. +- * User action: +- * Verify the setup and try removing and adding the port again. If this +- * problem persists, gather Linux debug data, collect the FCP adapter +- * hardware logs, and report the problem to your support organization. +- */ +- +-/*? + * Text: "%s: LUN 0x%Lx on port 0x%Lx is already in use by CSS%d, MIF Image ID %x\n" + * Severity: Warning + * Parameter: +--- a/drivers/s390/scsi/zfcp_dbf.c 2008-12-19 13:18:45.000000000 +0100 ++++ b/drivers/s390/scsi/zfcp_dbf.c 2008-12-19 13:18:59.000000000 +0100 +@@ -521,7 +521,7 @@ static const char *zfcp_rec_dbf_ids[] = + [29] = "link down", + [30] = "link up status read", + [31] = "open port failed", +- [32] = "open port failed", ++ [32] = "", + [33] = "close port", + [34] = "open unit failed", + [35] = "exclusive open unit failed", +--- a/drivers/s390/scsi/zfcp_fsf.c 2008-12-19 13:18:45.000000000 +0100 ++++ b/drivers/s390/scsi/zfcp_fsf.c 2008-12-19 13:18:59.000000000 +0100 +@@ -1405,13 +1405,7 @@ static void zfcp_fsf_open_port_handler(s + switch (header->fsf_status_qual.word[0]) { + case FSF_SQ_INVOKE_LINK_TEST_PROCEDURE: + case FSF_SQ_ULP_DEPENDENT_ERP_REQUIRED: +- req->status |= ZFCP_STATUS_FSFREQ_ERROR; +- break; + case FSF_SQ_NO_RETRY_POSSIBLE: +- dev_warn(&req->adapter->ccw_device->dev, +- "Remote port 0x%016Lx could not be opened\n", +- (unsigned long long)port->wwpn); +- zfcp_erp_port_failed(port, 32, req); + req->status |= ZFCP_STATUS_FSFREQ_ERROR; + break; + } diff --git a/src/patches/suse-2.6.27.25/patches.arch/s390-07-02-zfcp-unchained-fsf.diff b/src/patches/suse-2.6.27.25/patches.arch/s390-07-02-zfcp-unchained-fsf.diff new file mode 100644 index 0000000000..25452abb3e --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.arch/s390-07-02-zfcp-unchained-fsf.diff @@ -0,0 +1,378 @@ +From: Gerald Schaefer +Subject: zfcp: Add support for unchained FSF requests +References: bnc#464466 + +Symptom: On a z900 zfcp loops in error recovery. +Problem: The z900 requires support for unchained FSF requests for + CT and ELS requests. The chained format triggers the ERP + from the qdio error handler. +Solution: Check the hardware feature flag and send unchained CT + and ELS requests if chaining is not support. Adapt the + size of the GPN_FT request as necessary and add debug data + and a warning, in case the CT request hits a limit. + +Acked-by: John Jolly +--- + Documentation/kmsg/s390/zfcp | 16 ++++++++++++ + drivers/s390/scsi/zfcp_dbf.c | 2 + + drivers/s390/scsi/zfcp_dbf.h | 1 + drivers/s390/scsi/zfcp_def.h | 9 ------- + drivers/s390/scsi/zfcp_fc.c | 55 ++++++++++++++++++++++++------------------- + drivers/s390/scsi/zfcp_fsf.c | 32 +++++++++++++++++++------ + drivers/s390/scsi/zfcp_fsf.h | 2 + + 7 files changed, 77 insertions(+), 40 deletions(-) + +--- a/drivers/s390/scsi/zfcp_fc.c 2008-12-19 13:36:23.000000000 +0100 ++++ b/drivers/s390/scsi/zfcp_fc.c 2008-12-19 13:36:27.000000000 +0100 +@@ -25,9 +25,12 @@ struct gpn_ft_resp_acc { + u64 wwpn; + } __attribute__ ((packed)); + +-#define ZFCP_GPN_FT_ENTRIES ((PAGE_SIZE - sizeof(struct ct_hdr)) \ +- / sizeof(struct gpn_ft_resp_acc)) ++#define ZFCP_CT_SIZE_ONE_PAGE (PAGE_SIZE - sizeof(struct ct_hdr)) ++#define ZFCP_GPN_FT_ENTRIES (ZFCP_CT_SIZE_ONE_PAGE \ ++ / sizeof(struct gpn_ft_resp_acc)) + #define ZFCP_GPN_FT_BUFFERS 4 ++#define ZFCP_GPN_FT_MAX_SIZE (ZFCP_GPN_FT_BUFFERS * PAGE_SIZE \ ++ - sizeof(struct ct_hdr)) + #define ZFCP_GPN_FT_MAX_ENTRIES ZFCP_GPN_FT_BUFFERS * (ZFCP_GPN_FT_ENTRIES + 1) + + struct ct_iu_gpn_ft_resp { +@@ -283,8 +286,6 @@ int static zfcp_fc_ns_gid_pn_request(str + gid_pn->ct.timeout = ZFCP_NS_GID_PN_TIMEOUT; + gid_pn->ct.req = &gid_pn->req; + gid_pn->ct.resp = &gid_pn->resp; +- gid_pn->ct.req_count = 1; +- gid_pn->ct.resp_count = 1; + sg_init_one(&gid_pn->req, &gid_pn->ct_iu_req, + sizeof(struct ct_iu_gid_pn_req)); + sg_init_one(&gid_pn->resp, &gid_pn->ct_iu_resp, +@@ -296,7 +297,7 @@ int static zfcp_fc_ns_gid_pn_request(str + gid_pn->ct_iu_req.header.gs_subtype = ZFCP_CT_NAME_SERVER; + gid_pn->ct_iu_req.header.options = ZFCP_CT_SYNCHRONOUS; + gid_pn->ct_iu_req.header.cmd_rsp_code = ZFCP_CT_GID_PN; +- gid_pn->ct_iu_req.header.max_res_size = ZFCP_CT_MAX_SIZE; ++ gid_pn->ct_iu_req.header.max_res_size = ZFCP_CT_SIZE_ONE_PAGE / 4; + gid_pn->ct_iu_req.wwpn = erp_action->port->wwpn; + + init_completion(&compl_rec.done); +@@ -406,8 +407,6 @@ static int zfcp_fc_adisc(struct zfcp_por + sg_init_one(adisc->els.resp, &adisc->ls_adisc_acc, + sizeof(struct zfcp_ls_adisc)); + +- adisc->els.req_count = 1; +- adisc->els.resp_count = 1; + adisc->els.adapter = adapter; + adisc->els.port = port; + adisc->els.d_id = port->d_id; +@@ -447,17 +446,17 @@ void zfcp_test_link(struct zfcp_port *po + zfcp_erp_port_forced_reopen(port, 0, 65, NULL); + } + +-static void zfcp_free_sg_env(struct zfcp_gpn_ft *gpn_ft) ++static void zfcp_free_sg_env(struct zfcp_gpn_ft *gpn_ft, int buf_num) + { + struct scatterlist *sg = &gpn_ft->sg_req; + + kfree(sg_virt(sg)); /* free request buffer */ +- zfcp_sg_free_table(gpn_ft->sg_resp, ZFCP_GPN_FT_BUFFERS); ++ zfcp_sg_free_table(gpn_ft->sg_resp, buf_num); + + kfree(gpn_ft); + } + +-static struct zfcp_gpn_ft *zfcp_alloc_sg_env(void) ++static struct zfcp_gpn_ft *zfcp_alloc_sg_env(int buf_num) + { + struct zfcp_gpn_ft *gpn_ft; + struct ct_iu_gpn_ft_req *req; +@@ -474,8 +473,8 @@ static struct zfcp_gpn_ft *zfcp_alloc_sg + } + sg_init_one(&gpn_ft->sg_req, req, sizeof(*req)); + +- if (zfcp_sg_setup_table(gpn_ft->sg_resp, ZFCP_GPN_FT_BUFFERS)) { +- zfcp_free_sg_env(gpn_ft); ++ if (zfcp_sg_setup_table(gpn_ft->sg_resp, buf_num)) { ++ zfcp_free_sg_env(gpn_ft, buf_num); + gpn_ft = NULL; + } + out: +@@ -484,7 +483,8 @@ out: + + + static int zfcp_scan_issue_gpn_ft(struct zfcp_gpn_ft *gpn_ft, +- struct zfcp_adapter *adapter) ++ struct zfcp_adapter *adapter, ++ int max_bytes) + { + struct zfcp_send_ct *ct = &gpn_ft->ct; + struct ct_iu_gpn_ft_req *req = sg_virt(&gpn_ft->sg_req); +@@ -497,8 +497,7 @@ static int zfcp_scan_issue_gpn_ft(struct + req->header.gs_subtype = ZFCP_CT_NAME_SERVER; + req->header.options = ZFCP_CT_SYNCHRONOUS; + req->header.cmd_rsp_code = ZFCP_CT_GPN_FT; +- req->header.max_res_size = (sizeof(struct gpn_ft_resp_acc) * +- (ZFCP_GPN_FT_MAX_ENTRIES - 1)) >> 2; ++ req->header.max_res_size = max_bytes / 4; + req->flags = 0; + req->domain_id_scope = 0; + req->area_id_scope = 0; +@@ -511,8 +510,6 @@ static int zfcp_scan_issue_gpn_ft(struct + ct->timeout = 10; + ct->req = &gpn_ft->sg_req; + ct->resp = gpn_ft->sg_resp; +- ct->req_count = 1; +- ct->resp_count = ZFCP_GPN_FT_BUFFERS; + + init_completion(&compl_rec.done); + compl_rec.handler = NULL; +@@ -539,7 +536,7 @@ static void zfcp_validate_port(struct zf + zfcp_port_dequeue(port); + } + +-static int zfcp_scan_eval_gpn_ft(struct zfcp_gpn_ft *gpn_ft) ++static int zfcp_scan_eval_gpn_ft(struct zfcp_gpn_ft *gpn_ft, int max_entries) + { + struct zfcp_send_ct *ct = &gpn_ft->ct; + struct scatterlist *sg = gpn_ft->sg_resp; +@@ -559,13 +556,17 @@ static int zfcp_scan_eval_gpn_ft(struct + return -EIO; + } + +- if (hdr->max_res_size) ++ if (hdr->max_res_size) { ++ dev_warn(&adapter->ccw_device->dev, ++ "The name server reported %d words residual data\n", ++ hdr->max_res_size); + return -E2BIG; ++ } + + down(&zfcp_data.config_sema); + + /* first entry is the header */ +- for (x = 1; x < ZFCP_GPN_FT_MAX_ENTRIES && !last; x++) { ++ for (x = 1; x < max_entries && !last; x++) { + if (x % (ZFCP_GPN_FT_ENTRIES + 1)) + acc++; + else +@@ -611,6 +612,12 @@ int zfcp_scan_ports(struct zfcp_adapter + { + int ret, i; + struct zfcp_gpn_ft *gpn_ft; ++ int chain, max_entries, buf_num, max_bytes; ++ ++ chain = adapter->adapter_features & FSF_FEATURE_ELS_CT_CHAINED_SBALS; ++ buf_num = chain ? ZFCP_GPN_FT_BUFFERS : 1; ++ max_entries = chain ? ZFCP_GPN_FT_MAX_ENTRIES : ZFCP_GPN_FT_ENTRIES; ++ max_bytes = chain ? ZFCP_GPN_FT_MAX_SIZE : ZFCP_CT_SIZE_ONE_PAGE; + + zfcp_erp_wait(adapter); /* wait until adapter is finished with ERP */ + if (fc_host_port_type(adapter->scsi_host) != FC_PORTTYPE_NPORT) +@@ -620,23 +627,23 @@ int zfcp_scan_ports(struct zfcp_adapter + if (ret) + return ret; + +- gpn_ft = zfcp_alloc_sg_env(); ++ gpn_ft = zfcp_alloc_sg_env(buf_num); + if (!gpn_ft) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < 3; i++) { +- ret = zfcp_scan_issue_gpn_ft(gpn_ft, adapter); ++ ret = zfcp_scan_issue_gpn_ft(gpn_ft, adapter, max_bytes); + if (!ret) { +- ret = zfcp_scan_eval_gpn_ft(gpn_ft); ++ ret = zfcp_scan_eval_gpn_ft(gpn_ft, max_entries); + if (ret == -EAGAIN) + ssleep(1); + else + break; + } + } +- zfcp_free_sg_env(gpn_ft); ++ zfcp_free_sg_env(gpn_ft, buf_num); + out: + zfcp_wka_port_put(&adapter->nsp); + return ret; +--- a/drivers/s390/scsi/zfcp_fsf.h 2008-12-19 13:36:23.000000000 +0100 ++++ b/drivers/s390/scsi/zfcp_fsf.h 2008-12-19 13:36:27.000000000 +0100 +@@ -164,6 +164,7 @@ + #define FSF_FEATURE_LUN_SHARING 0x00000004 + #define FSF_FEATURE_NOTIFICATION_LOST 0x00000008 + #define FSF_FEATURE_HBAAPI_MANAGEMENT 0x00000010 ++#define FSF_FEATURE_ELS_CT_CHAINED_SBALS 0x00000020 + #define FSF_FEATURE_UPDATE_ALERT 0x00000100 + #define FSF_FEATURE_MEASUREMENT_DATA 0x00000200 + +@@ -322,6 +323,7 @@ struct fsf_nport_serv_param { + u8 vendor_version_level[16]; + } __attribute__ ((packed)); + ++#define FSF_PLOGI_MIN_LEN 112 + struct fsf_plogi { + u32 code; + struct fsf_nport_serv_param serv_param; +--- a/drivers/s390/scsi/zfcp_fsf.c 2008-12-19 13:36:23.000000000 +0100 ++++ b/drivers/s390/scsi/zfcp_fsf.c 2008-12-19 13:36:27.000000000 +0100 +@@ -1012,12 +1012,29 @@ skip_fsfstatus: + send_ct->handler(send_ct->handler_data); + } + +-static int zfcp_fsf_setup_sbals(struct zfcp_fsf_req *req, +- struct scatterlist *sg_req, +- struct scatterlist *sg_resp, int max_sbals) ++static int zfcp_fsf_setup_ct_els_sbals(struct zfcp_fsf_req *req, ++ struct scatterlist *sg_req, ++ struct scatterlist *sg_resp, ++ int max_sbals) + { ++ struct qdio_buffer_element *sbale = zfcp_qdio_sbale_req(req); ++ u32 feat = req->adapter->adapter_features; + int bytes; + ++ if (!(feat & FSF_FEATURE_ELS_CT_CHAINED_SBALS)) { ++ if (sg_req->length > PAGE_SIZE || sg_resp->length > PAGE_SIZE || ++ !sg_is_last(sg_req) || !sg_is_last(sg_resp)) ++ return -EOPNOTSUPP; ++ ++ sbale[0].flags |= SBAL_FLAGS0_TYPE_WRITE_READ; ++ sbale[2].addr = sg_virt(sg_req); ++ sbale[2].length = sg_req->length; ++ sbale[3].addr = sg_virt(sg_resp); ++ sbale[3].length = sg_resp->length; ++ sbale[3].flags |= SBAL_FLAGS_LAST_ENTRY; ++ return 0; ++ } ++ + bytes = zfcp_qdio_sbals_from_sg(req, SBAL_FLAGS0_TYPE_WRITE_READ, + sg_req, max_sbals); + if (bytes <= 0) +@@ -1059,8 +1076,8 @@ int zfcp_fsf_send_ct(struct zfcp_send_ct + goto out; + } + +- ret = zfcp_fsf_setup_sbals(req, ct->req, ct->resp, +- FSF_MAX_SBALS_PER_REQ); ++ ret = zfcp_fsf_setup_ct_els_sbals(req, ct->req, ct->resp, ++ FSF_MAX_SBALS_PER_REQ); + if (ret) + goto failed_send; + +@@ -1170,7 +1187,7 @@ int zfcp_fsf_send_els(struct zfcp_send_e + goto out; + } + +- ret = zfcp_fsf_setup_sbals(req, els->req, els->resp, 2); ++ ret = zfcp_fsf_setup_ct_els_sbals(req, els->req, els->resp, 2); + + if (ret) + goto failed_send; +@@ -1433,7 +1450,8 @@ static void zfcp_fsf_open_port_handler(s + * Alternately, an ADISC/PDISC ELS should suffice, as well. + */ + plogi = (struct fsf_plogi *) req->qtcb->bottom.support.els; +- if (req->qtcb->bottom.support.els1_length >= sizeof(*plogi)) { ++ if (req->qtcb->bottom.support.els1_length >= ++ FSF_PLOGI_MIN_LEN) { + if (plogi->serv_param.wwpn != port->wwpn) + atomic_clear_mask(ZFCP_STATUS_PORT_DID_DID, + &port->status); +--- a/Documentation/kmsg/s390/zfcp 2008-12-19 13:36:23.000000000 +0100 ++++ b/Documentation/kmsg/s390/zfcp 2008-12-19 13:36:27.000000000 +0100 +@@ -813,3 +813,19 @@ + * problem persists, gather Linux debug data, collect the FCP adapter + * hardware logs, and report the problem to your support organization. + */ ++ ++/*? ++ * Text: "%s: The name server reported %d words residual data\n" ++ * Severity: Warning ++ * Parameter: ++ * @1: bus ID of the zfcp device ++ * @2: number of words in residual data ++ * Description: ++ * The fibre channel name server sent too much information about remote ports. ++ * The zfcp device driver did not receive sufficient information to attach all ++ * available remote ports in the SAN. ++ * User action: ++ * Verify that you are running the latest firmware level on the FCP ++ * adapter. Check your SAN setup and consider reducing the number of ports ++ * visible to the FCP adapter by using more restrictive zoning in the SAN. ++ */ +--- a/drivers/s390/scsi/zfcp_dbf.c 2008-12-19 13:36:23.000000000 +0100 ++++ b/drivers/s390/scsi/zfcp_dbf.c 2008-12-19 13:36:27.000000000 +0100 +@@ -935,6 +935,7 @@ void zfcp_san_dbf_event_ct_response(stru + rct->reason_code = hdr->reason_code; + rct->expl = hdr->reason_code_expl; + rct->vendor_unique = hdr->vendor_unique; ++ rct->max_res_size = hdr->max_res_size; + rct->len = min((int)ct->resp->length - (int)sizeof(struct ct_hdr), + ZFCP_DBF_SAN_MAX_PAYLOAD); + debug_event(adapter->san_dbf, level, r, sizeof(*r)); +@@ -1042,6 +1043,7 @@ static int zfcp_san_dbf_view_format(debu + zfcp_dbf_out(&p, "reason_code", "0x%02x", ct->reason_code); + zfcp_dbf_out(&p, "reason_code_expl", "0x%02x", ct->expl); + zfcp_dbf_out(&p, "vendor_unique", "0x%02x", ct->vendor_unique); ++ zfcp_dbf_out(&p, "max_res_size", "0x%04x", ct->max_res_size); + } else if (strncmp(r->tag, "oels", ZFCP_DBF_TAG_SIZE) == 0 || + strncmp(r->tag, "rels", ZFCP_DBF_TAG_SIZE) == 0 || + strncmp(r->tag, "iels", ZFCP_DBF_TAG_SIZE) == 0) { +--- a/drivers/s390/scsi/zfcp_dbf.h 2008-12-19 13:36:23.000000000 +0100 ++++ b/drivers/s390/scsi/zfcp_dbf.h 2008-12-19 13:36:27.000000000 +0100 +@@ -171,6 +171,7 @@ struct zfcp_san_dbf_record_ct_response { + u8 reason_code; + u8 expl; + u8 vendor_unique; ++ u16 max_res_size; + u32 len; + } __attribute__ ((packed)); + +--- a/drivers/s390/scsi/zfcp_def.h 2008-12-19 13:36:23.000000000 +0100 ++++ b/drivers/s390/scsi/zfcp_def.h 2008-12-19 13:36:27.000000000 +0100 +@@ -210,7 +210,6 @@ struct zfcp_ls_adisc { + #define ZFCP_CT_UNABLE_TO_PERFORM_CMD 0x09 + #define ZFCP_CT_GID_PN 0x0121 + #define ZFCP_CT_GPN_FT 0x0172 +-#define ZFCP_CT_MAX_SIZE 0x1020 + #define ZFCP_CT_ACCEPT 0x8002 + #define ZFCP_CT_REJECT 0x8001 + +@@ -339,8 +338,6 @@ struct ct_iu_gid_pn_resp { + * @wka_port: port where the request is sent to + * @req: scatter-gather list for request + * @resp: scatter-gather list for response +- * @req_count: number of elements in request scatter-gather list +- * @resp_count: number of elements in response scatter-gather list + * @handler: handler function (called for response to the request) + * @handler_data: data passed to handler function + * @timeout: FSF timeout for this request +@@ -351,8 +348,6 @@ struct zfcp_send_ct { + struct zfcp_wka_port *wka_port; + struct scatterlist *req; + struct scatterlist *resp; +- unsigned int req_count; +- unsigned int resp_count; + void (*handler)(unsigned long); + unsigned long handler_data; + int timeout; +@@ -377,8 +372,6 @@ struct zfcp_gid_pn_data { + * @d_id: destiniation id of port where request is sent to + * @req: scatter-gather list for request + * @resp: scatter-gather list for response +- * @req_count: number of elements in request scatter-gather list +- * @resp_count: number of elements in response scatter-gather list + * @handler: handler function (called for response to the request) + * @handler_data: data passed to handler function + * @completion: completion for synchronization purposes +@@ -391,8 +384,6 @@ struct zfcp_send_els { + u32 d_id; + struct scatterlist *req; + struct scatterlist *resp; +- unsigned int req_count; +- unsigned int resp_count; + void (*handler)(unsigned long); + unsigned long handler_data; + struct completion *completion; diff --git a/src/patches/suse-2.6.27.25/patches.arch/s390-07-03-topology-fix.diff b/src/patches/suse-2.6.27.25/patches.arch/s390-07-03-topology-fix.diff new file mode 100644 index 0000000000..c86c5e3fd7 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.arch/s390-07-03-topology-fix.diff @@ -0,0 +1,207 @@ +From: Gerald Schaefer +Subject: kernel: fix cpu topology support +References: bnc#464466 + +Symptom: CPU topology changes aren't recognized by the scheduler. +Problem: The common code scheduler used to have a hook which could be + called from architecture code to trigger a rebuild of all + scheduling domains when cpu topology changed. This hook got + removed errorneously. So cpu topology change notifications + got lost. +Solution: Readd the hook. This patch also removes some unused code + from the s390 specific cpu topology code. + +Acked-by: John Jolly +--- + arch/s390/kernel/topology.c | 35 ++++++++++------------------------- + include/linux/topology.h | 2 +- + kernel/sched.c | 16 +++++++++++++--- + 3 files changed, 24 insertions(+), 29 deletions(-) + +--- a/arch/s390/kernel/topology.c ++++ b/arch/s390/kernel/topology.c +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -64,7 +65,6 @@ static void topology_work_fn(struct work + static struct tl_info *tl_info; + static struct core_info core_info; + static int machine_has_topology; +-static int machine_has_topology_irq; + static struct timer_list topology_timer; + static void set_topology_timer(void); + static DECLARE_WORK(topology_work, topology_work_fn); +@@ -81,7 +81,7 @@ cpumask_t cpu_coregroup_map(unsigned int + + cpus_clear(mask); + if (!topology_enabled || !machine_has_topology) +- return cpu_present_map; ++ return cpu_possible_map; + spin_lock_irqsave(&topology_lock, flags); + while (core) { + if (cpu_isset(cpu, core->mask)) { +@@ -171,7 +171,7 @@ static void topology_update_polarization + int cpu; + + mutex_lock(&smp_cpu_state_mutex); +- for_each_present_cpu(cpu) ++ for_each_possible_cpu(cpu) + smp_cpu_polarization[cpu] = POLARIZATION_HRZ; + mutex_unlock(&smp_cpu_state_mutex); + } +@@ -202,7 +202,7 @@ int topology_set_cpu_management(int fc) + rc = ptf(PTF_HORIZONTAL); + if (rc) + return -EBUSY; +- for_each_present_cpu(cpu) ++ for_each_possible_cpu(cpu) + smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN; + return rc; + } +@@ -211,11 +211,11 @@ static void update_cpu_core_map(void) + { + int cpu; + +- for_each_present_cpu(cpu) ++ for_each_possible_cpu(cpu) + cpu_core_map[cpu] = cpu_coregroup_map(cpu); + } + +-void arch_update_cpu_topology(void) ++int arch_update_cpu_topology(void) + { + struct tl_info *info = tl_info; + struct sys_device *sysdev; +@@ -224,7 +224,7 @@ void arch_update_cpu_topology(void) + if (!machine_has_topology) { + update_cpu_core_map(); + topology_update_polarization_simple(); +- return; ++ return 0; + } + stsi(info, 15, 1, 2); + tl_to_cores(info); +@@ -233,11 +233,12 @@ void arch_update_cpu_topology(void) + sysdev = get_cpu_sysdev(cpu); + kobject_uevent(&sysdev->kobj, KOBJ_CHANGE); + } ++ return 1; + } + + static void topology_work_fn(struct work_struct *work) + { +- arch_reinit_sched_domains(); ++ rebuild_sched_domains(); + } + + void topology_schedule_update(void) +@@ -260,11 +261,6 @@ static void set_topology_timer(void) + add_timer(&topology_timer); + } + +-static void topology_interrupt(__u16 code) +-{ +- schedule_work(&topology_work); +-} +- + static int __init early_parse_topology(char *p) + { + if (strncmp(p, "on", 2)) +@@ -284,14 +280,7 @@ static int __init init_topology_update(v + goto out; + } + init_timer_deferrable(&topology_timer); +- if (machine_has_topology_irq) { +- rc = register_external_interrupt(0x2005, topology_interrupt); +- if (rc) +- goto out; +- ctl_set_bit(0, 8); +- } +- else +- set_topology_timer(); ++ set_topology_timer(); + out: + update_cpu_core_map(); + return rc; +@@ -312,9 +301,6 @@ void __init s390_init_cpu_topology(void) + return; + machine_has_topology = 1; + +- if (facility_bits & (1ULL << 51)) +- machine_has_topology_irq = 1; +- + tl_info = alloc_bootmem_pages(PAGE_SIZE); + info = tl_info; + stsi(info, 15, 1, 2); +@@ -338,5 +324,4 @@ void __init s390_init_cpu_topology(void) + return; + error: + machine_has_topology = 0; +- machine_has_topology_irq = 0; + } +--- a/include/linux/topology.h ++++ b/include/linux/topology.h +@@ -49,7 +49,7 @@ + for_each_online_node(node) \ + if (nr_cpus_node(node)) + +-void arch_update_cpu_topology(void); ++int arch_update_cpu_topology(void); + + /* Conform to ACPI 2.0 SLIT distance definitions */ + #define LOCAL_DISTANCE 10 +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -7640,8 +7640,14 @@ static struct sched_domain_attr *dattr_c + */ + static cpumask_t fallback_doms; + +-void __attribute__((weak)) arch_update_cpu_topology(void) ++/* ++ * arch_update_cpu_topology lets virtualized architectures update the ++ * cpu core maps. It is supposed to return 1 if the topology changed ++ * or 0 if it stayed the same. ++ */ ++int __attribute__((weak)) arch_update_cpu_topology(void) + { ++ return 0; + } + + /* +@@ -7735,17 +7741,21 @@ void partition_sched_domains(int ndoms_n + struct sched_domain_attr *dattr_new) + { + int i, j, n; ++ int top_changed; + + mutex_lock(&sched_domains_mutex); + + /* always unregister in case we don't destroy any domains */ + unregister_sched_domain_sysctl(); + ++ /* Let architecture update cpu core mappings. */ ++ top_changed = arch_update_cpu_topology(); ++ + n = doms_new ? ndoms_new : 0; + + /* Destroy deleted domains */ + for (i = 0; i < ndoms_cur; i++) { +- for (j = 0; j < n; j++) { ++ for (j = 0; j < n && !top_changed; j++) { + if (cpus_equal(doms_cur[i], doms_new[j]) + && dattrs_equal(dattr_cur, i, dattr_new, j)) + goto match1; +@@ -7765,7 +7775,7 @@ match1: + + /* Build new domains */ + for (i = 0; i < ndoms_new; i++) { +- for (j = 0; j < ndoms_cur; j++) { ++ for (j = 0; j < ndoms_cur && !top_changed; j++) { + if (cpus_equal(doms_new[i], doms_cur[j]) + && dattrs_equal(dattr_new, i, dattr_cur, j)) + goto match2; diff --git a/src/patches/suse-2.6.27.25/patches.arch/s390-08-01-cio-fix-mp-mode.diff b/src/patches/suse-2.6.27.25/patches.arch/s390-08-01-cio-fix-mp-mode.diff new file mode 100644 index 0000000000..852e22b250 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.arch/s390-08-01-cio-fix-mp-mode.diff @@ -0,0 +1,61 @@ +From: Gerald Schaefer +Subject: cio: fix subchannel multipath mode setup +References: bnc#466462,LTC#51047 + +Symptom: Undefined behavior when trying to access DASD devices with more + than one CHPID: e.g. I/O errors due to timeouts after missing + interrupts, slow access to DASDs because single path mode is used. +Problem: Setup of subchannel multipath mode is not performed correctly + because changes to a local buffer are lost before they are sent + to the channel subsystem. In this state, the control unit assumes + multipath mode while the channel subsystem expects single path + mode. As a result, interrupts may not be correctly recognized + which leads to timeout situations and eventually I/O errors. + Also single path processing may slow down DASD access. +Solution: Apply changes to the subchannel configuration after modifying + the local buffer. + +Acked-by: John Jolly + +--- + drivers/s390/cio/device.c | 6 ++++++ + drivers/s390/cio/device_fsm.c | 2 ++ + 2 files changed, 8 insertions(+) + +Index: linux-sles11/drivers/s390/cio/device.c +=================================================================== +--- linux-sles11.orig/drivers/s390/cio/device.c ++++ linux-sles11/drivers/s390/cio/device.c +@@ -1246,6 +1246,9 @@ static int io_subchannel_probe(struct su + return 0; + } + io_subchannel_init_fields(sch); ++ rc = cio_modify(sch); ++ if (rc) ++ goto out_schedule; + /* + * First check if a fitting device may be found amongst the + * disconnected devices or in the orphanage. +@@ -1676,6 +1679,9 @@ static int ccw_device_console_enable(str + sch->private = cio_get_console_priv(); + memset(sch->private, 0, sizeof(struct io_subchannel_private)); + io_subchannel_init_fields(sch); ++ rc = cio_modify(sch); ++ if (rc) ++ return rc; + sch->driver = &io_subchannel_driver; + /* Initialize the ccw_device structure. */ + cdev->dev.parent= &sch->dev; +Index: linux-sles11/drivers/s390/cio/device_fsm.c +=================================================================== +--- linux-sles11.orig/drivers/s390/cio/device_fsm.c ++++ linux-sles11/drivers/s390/cio/device_fsm.c +@@ -1028,6 +1028,8 @@ void ccw_device_trigger_reprobe(struct c + sch->schib.pmcw.ena = 0; + if ((sch->lpm & (sch->lpm - 1)) != 0) + sch->schib.pmcw.mp = 1; ++ if (cio_modify(sch)) ++ return; + /* We should also udate ssd info, but this has to wait. */ + /* Check if this is another device which appeared on the same sch. */ + if (sch->schib.pmcw.dev != cdev->private->dev_id.devno) { diff --git a/src/patches/suse-2.6.27.25/patches.arch/s390-08-02-zfcp-gpn-align-fix.diff b/src/patches/suse-2.6.27.25/patches.arch/s390-08-02-zfcp-gpn-align-fix.diff new file mode 100644 index 0000000000..d3a4403fec --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.arch/s390-08-02-zfcp-gpn-align-fix.diff @@ -0,0 +1,109 @@ +From: Gerald Schaefer +Subject: zfcp: fix memory alignment for GPN_FT requests. +References: bnc#466462 + +Symptom: An unexpected adapter reopen can be triggered in case + of a wrongly aligned GPN_FT nameserver request. +Problem: A request which is stored across a page is not allowed. + The standard memory allocation does not guarantee to have + all requested memory within one page. +Solution: Make sure the requested memory is always within one page. + +Acked-by: John Jolly + +--- + drivers/s390/scsi/zfcp_aux.c | 7 +++++++ + drivers/s390/scsi/zfcp_def.h | 9 +++++++++ + drivers/s390/scsi/zfcp_fc.c | 13 +++---------- + 3 files changed, 19 insertions(+), 10 deletions(-) + +Index: linux-sles11/drivers/s390/scsi/zfcp_aux.c +=================================================================== +--- linux-sles11.orig/drivers/s390/scsi/zfcp_aux.c ++++ linux-sles11/drivers/s390/scsi/zfcp_aux.c +@@ -175,6 +175,11 @@ static int __init zfcp_module_init(void) + if (!zfcp_data.gid_pn_cache) + goto out_gid_cache; + ++ zfcp_data.gpn_ft_cache = zfcp_cache_create( ++ sizeof(struct ct_iu_gpn_ft_req), "zfcp_gpn"); ++ if (!zfcp_data.gpn_ft_cache) ++ goto out_gpn_cache; ++ + zfcp_data.work_queue = create_singlethread_workqueue("zfcp_wq"); + + INIT_LIST_HEAD(&zfcp_data.adapter_list_head); +@@ -209,6 +214,8 @@ out_ccw_register: + out_misc: + fc_release_transport(zfcp_data.scsi_transport_template); + out_transport: ++ kmem_cache_destroy(zfcp_data.gpn_ft_cache); ++out_gpn_cache: + kmem_cache_destroy(zfcp_data.gid_pn_cache); + out_gid_cache: + kmem_cache_destroy(zfcp_data.sr_buffer_cache); +Index: linux-sles11/drivers/s390/scsi/zfcp_def.h +=================================================================== +--- linux-sles11.orig/drivers/s390/scsi/zfcp_def.h ++++ linux-sles11/drivers/s390/scsi/zfcp_def.h +@@ -333,6 +333,14 @@ struct ct_iu_gid_pn_resp { + u32 d_id; + } __attribute__ ((packed)); + ++struct ct_iu_gpn_ft_req { ++ struct ct_hdr header; ++ u8 flags; ++ u8 domain_id_scope; ++ u8 area_id_scope; ++ u8 fc4_type; ++} __attribute__ ((packed)); ++ + /** + * struct zfcp_send_ct - used to pass parameters to function zfcp_fsf_send_ct + * @wka_port: port where the request is sent to +@@ -595,6 +603,7 @@ struct zfcp_data { + struct kmem_cache *fsf_req_qtcb_cache; + struct kmem_cache *sr_buffer_cache; + struct kmem_cache *gid_pn_cache; ++ struct kmem_cache *gpn_ft_cache; + struct workqueue_struct *work_queue; + }; + +Index: linux-sles11/drivers/s390/scsi/zfcp_fc.c +=================================================================== +--- linux-sles11.orig/drivers/s390/scsi/zfcp_fc.c ++++ linux-sles11/drivers/s390/scsi/zfcp_fc.c +@@ -10,14 +10,6 @@ + + #include "zfcp_ext.h" + +-struct ct_iu_gpn_ft_req { +- struct ct_hdr header; +- u8 flags; +- u8 domain_id_scope; +- u8 area_id_scope; +- u8 fc4_type; +-} __attribute__ ((packed)); +- + struct gpn_ft_resp_acc { + u8 control; + u8 port_id[3]; +@@ -450,7 +442,8 @@ static void zfcp_free_sg_env(struct zfcp + { + struct scatterlist *sg = &gpn_ft->sg_req; + +- kfree(sg_virt(sg)); /* free request buffer */ ++ /* free request buffer */ ++ kmem_cache_free(zfcp_data.gpn_ft_cache, sg_virt(sg)); + zfcp_sg_free_table(gpn_ft->sg_resp, buf_num); + + kfree(gpn_ft); +@@ -465,7 +458,7 @@ static struct zfcp_gpn_ft *zfcp_alloc_sg + if (!gpn_ft) + return NULL; + +- req = kzalloc(sizeof(struct ct_iu_gpn_ft_req), GFP_KERNEL); ++ req = kmem_cache_alloc(zfcp_data.gpn_ft_cache, GFP_KERNEL); + if (!req) { + kfree(gpn_ft); + gpn_ft = NULL; diff --git a/src/patches/suse-2.6.27.25/patches.arch/s390-08-03-iucv-cpu-hotremove.diff b/src/patches/suse-2.6.27.25/patches.arch/s390-08-03-iucv-cpu-hotremove.diff new file mode 100644 index 0000000000..98cbf4b912 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.arch/s390-08-03-iucv-cpu-hotremove.diff @@ -0,0 +1,83 @@ +From: Gerald Schaefer +Subject: iucv: failing cpu hot remove for inactive iucv +References: bnc#466462,LTC#51104 + +Symptom: cpu hot remove rejected with NOTIFY_BAD +Problem: If the iucv module is compiled in / loaded but no user + is registered, cpu hot remove doesn't work. The iucv + cpu hotplug notifier on CPU_DOWN_PREPARE checks, if + the iucv_buffer_cpumask would be empty after the + corresponding bit would be cleared. However the bit + was never set since iucv wasn't enabled. That causes + all cpu hot unplug operations to fail in this scenario. +Solution: Use iucv_path_table as an indicator whether iucv is + enabled or not. + +Acked-by: John Jolly +--- + + net/iucv/iucv.c | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +Index: linux-sles11/net/iucv/iucv.c +=================================================================== +--- linux-sles11.orig/net/iucv/iucv.c ++++ linux-sles11/net/iucv/iucv.c +@@ -516,6 +516,7 @@ static int iucv_enable(void) + size_t alloc_size; + int cpu, rc; + ++ get_online_cpus(); + rc = -ENOMEM; + alloc_size = iucv_max_pathid * sizeof(struct iucv_path); + iucv_path_table = kzalloc(alloc_size, GFP_KERNEL); +@@ -523,19 +524,17 @@ static int iucv_enable(void) + goto out; + /* Declare per cpu buffers. */ + rc = -EIO; +- get_online_cpus(); + for_each_online_cpu(cpu) + smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1); + if (cpus_empty(iucv_buffer_cpumask)) + /* No cpu could declare an iucv buffer. */ +- goto out_path; ++ goto out; + put_online_cpus(); + return 0; +- +-out_path: +- put_online_cpus(); +- kfree(iucv_path_table); + out: ++ kfree(iucv_path_table); ++ iucv_path_table = NULL; ++ put_online_cpus(); + return rc; + } + +@@ -550,8 +549,9 @@ static void iucv_disable(void) + { + get_online_cpus(); + on_each_cpu(iucv_retrieve_cpu, NULL, 1); +- put_online_cpus(); + kfree(iucv_path_table); ++ iucv_path_table = NULL; ++ put_online_cpus(); + } + + static int __cpuinit iucv_cpu_notify(struct notifier_block *self, +@@ -588,10 +588,14 @@ static int __cpuinit iucv_cpu_notify(str + case CPU_ONLINE_FROZEN: + case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: ++ if (!iucv_path_table) ++ break; + smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1); + break; + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: ++ if (!iucv_path_table) ++ break; + cpumask = iucv_buffer_cpumask; + cpu_clear(cpu, cpumask); + if (cpus_empty(cpumask)) diff --git a/src/patches/suse-2.6.27.25/patches.arch/s390-08-04-compat-sigaltstack.diff b/src/patches/suse-2.6.27.25/patches.arch/s390-08-04-compat-sigaltstack.diff new file mode 100644 index 0000000000..5f227a7270 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.arch/s390-08-04-compat-sigaltstack.diff @@ -0,0 +1,31 @@ +From: Gerald Schaefer +Subject: kernel: 31 bit compat sigaltstack syscall fails with -EFAULT. +References: bnc#466462,LTC#50888 + +Symptom: When 31 bit user space programs call sigaltstack on a 64 bit Linux + OS, the system call returns -1 with errno=EFAULT. +Problem: The 31 bit pointer passed to the system call is extended + to 64 bit, but the high order bits are not set to zero. + The kernel detects the invalid user space pointer and + returns -EFAULT. +Solution: Call sys32_sigaltstack_wrapper() instead of sys32_sigaltstack(). + The wrapper function sets the high order bits to zero. + +Acked-by: John Jolly +--- + arch/s390/kernel/syscalls.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +Index: linux-sles11/arch/s390/kernel/syscalls.S +=================================================================== +--- linux-sles11.orig/arch/s390/kernel/syscalls.S ++++ linux-sles11/arch/s390/kernel/syscalls.S +@@ -194,7 +194,7 @@ SYSCALL(sys_chown16,sys_ni_syscall,sys32 + SYSCALL(sys_getcwd,sys_getcwd,sys32_getcwd_wrapper) + SYSCALL(sys_capget,sys_capget,sys32_capget_wrapper) + SYSCALL(sys_capset,sys_capset,sys32_capset_wrapper) /* 185 */ +-SYSCALL(sys_sigaltstack,sys_sigaltstack,sys32_sigaltstack) ++SYSCALL(sys_sigaltstack,sys_sigaltstack,sys32_sigaltstack_wrapper) + SYSCALL(sys_sendfile,sys_sendfile64,sys32_sendfile_wrapper) + NI_SYSCALL /* streams1 */ + NI_SYSCALL /* streams2 */ diff --git a/src/patches/suse-2.6.27.25/patches.arch/s390-09-04-topology.diff b/src/patches/suse-2.6.27.25/patches.arch/s390-09-04-topology.diff new file mode 100644 index 0000000000..db7bc99e02 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.arch/s390-09-04-topology.diff @@ -0,0 +1,30 @@ +From: Gerald Schaefer +Subject: topology: introduce arch specific SD_MC_INIT initializer +References: bnc#477666,LTC#51049 + +Symptom: Up to 30% more cpu usage for some workloads. +Problem: For some workloads the extra multicore scheduling domain causes + additional cpu usage because of too optimistic assumptions when + it is ok to migrate processes from one cpu to another. The default + values for SD_MC_INIT don't work well on s390. +Solution: Define an architecure specific SD_MC_INIT scheduling domain + initializer which fixes the regression. + +Acked-by: John Jolly +--- + arch/s390/include/asm/topology.h | 2 ++ + 1 file changed, 2 insertions(+) + +Index: linux-sles11/arch/s390/include/asm/topology.h +=================================================================== +--- linux-sles11.orig/arch/s390/include/asm/topology.h ++++ linux-sles11/arch/s390/include/asm/topology.h +@@ -28,6 +28,8 @@ static inline void s390_init_cpu_topolog + }; + #endif + ++#define SD_MC_INIT SD_CPU_INIT ++ + #include + + #endif /* _ASM_S390_TOPOLOGY_H */ diff --git a/src/patches/suse-2.6.27.25/patches.drivers/fcoe-change-fcoe_sw-sg_tablesi.diff b/src/patches/suse-2.6.27.25/patches.drivers/fcoe-change-fcoe_sw-sg_tablesi.diff new file mode 100644 index 0000000000..5f0653c672 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/fcoe-change-fcoe_sw-sg_tablesi.diff @@ -0,0 +1,23 @@ +From: Yi Zou +Subject: [FcOE] change fcoe_sw sg_tablesize to SG_ALL +References: bnc #459142 + +Signed-off-by: Yi Zou +Acked-by: Bernhard Walle +--- + + drivers/scsi/fcoe/fcoe_sw.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + + +--- a/drivers/scsi/fcoe/fcoe_sw.c ++++ b/drivers/scsi/fcoe/fcoe_sw.c +@@ -100,7 +100,7 @@ static struct scsi_host_template fcoe_sw + .cmd_per_lun = 32, + .can_queue = FCOE_MAX_OUTSTANDING_COMMANDS, + .use_clustering = ENABLE_CLUSTERING, +- .sg_tablesize = 4, ++ .sg_tablesize = SG_ALL, + .max_sectors = 0xffff, + }; + diff --git a/src/patches/suse-2.6.27.25/patches.drivers/fcoe-check-return-for-fc_set_m.diff b/src/patches/suse-2.6.27.25/patches.drivers/fcoe-check-return-for-fc_set_m.diff new file mode 100644 index 0000000000..afe73248eb --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/fcoe-check-return-for-fc_set_m.diff @@ -0,0 +1,24 @@ +From: Yi Zou +Subject: [FcOE] check return for fc_set_mfs +References: bnc #459142 + +Signed-off-by: Yi Zou +Acked-by: Bernhard Walle +--- + + drivers/scsi/fcoe/fcoe_sw.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + + +--- a/drivers/scsi/fcoe/fcoe_sw.c ++++ b/drivers/scsi/fcoe/fcoe_sw.c +@@ -178,7 +178,8 @@ static int fcoe_sw_netdev_config(struct + */ + mfs = fc->real_dev->mtu - (sizeof(struct fcoe_hdr) + + sizeof(struct fcoe_crc_eof)); +- fc_set_mfs(lp, mfs); ++ if (fc_set_mfs(lp, mfs)) ++ return -EINVAL; + + lp->link_status = ~FC_PAUSE & ~FC_LINK_UP; + if (!fcoe_link_ok(lp)) diff --git a/src/patches/suse-2.6.27.25/patches.drivers/fcoe-fix-frame-length-validati.diff b/src/patches/suse-2.6.27.25/patches.drivers/fcoe-fix-frame-length-validati.diff new file mode 100644 index 0000000000..f623dbab7a --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/fcoe-fix-frame-length-validati.diff @@ -0,0 +1,149 @@ +From: Chris Leech +Subject: [FcOE] fix frame length validation in the early receive path +References: bnc #459142 + +Validation of the frame length was missing before accessing the FC and FCoE +headers. Some of the later checks were bogus, because of the way the fr_len +variable and skb->len were being manipulated they could never fail. + +Signed-off-by: Chris Leech +Acked-by: Bernhard Walle +--- + + drivers/scsi/fcoe/libfcoe.c | 48 +++++++++++++++++++++----------------------- + include/scsi/fc/fc_fcoe.h | 12 +++++++++++ + include/scsi/fc_frame.h | 2 - + 3 files changed, 36 insertions(+), 26 deletions(-) + + +--- a/drivers/scsi/fcoe/libfcoe.c ++++ b/drivers/scsi/fcoe/libfcoe.c +@@ -184,7 +184,6 @@ int fcoe_rcv(struct sk_buff *skb, struct + struct fcoe_rcv_info *fr; + struct fcoe_softc *fc; + struct fcoe_dev_stats *stats; +- u8 *data; + struct fc_frame_header *fh; + unsigned short oxid; + int cpu_idx; +@@ -211,9 +210,18 @@ int fcoe_rcv(struct sk_buff *skb, struct + FC_DBG("wrong FC type frame"); + goto err; + } +- data = skb->data; +- data += sizeof(struct fcoe_hdr); +- fh = (struct fc_frame_header *)data; ++ ++ /* ++ * Check for minimum frame length, and make sure required FCoE ++ * and FC headers are pulled into the linear data area. ++ */ ++ if (unlikely((skb->len < FCOE_MIN_FRAME) || ++ !pskb_may_pull(skb, FCOE_HEADER_LEN))) ++ goto err; ++ ++ skb_set_transport_header(skb, sizeof(struct fcoe_hdr)); ++ fh = (struct fc_frame_header *) skb_transport_header(skb); ++ + oxid = ntohs(fh->fh_ox_id); + + fr = fcoe_dev_from_skb(skb); +@@ -514,8 +522,6 @@ int fcoe_percpu_receive_thread(void *arg + { + struct fcoe_percpu_s *p = arg; + u32 fr_len; +- unsigned int hlen; +- unsigned int tlen; + struct fc_lport *lp; + struct fcoe_rcv_info *fr; + struct fcoe_dev_stats *stats; +@@ -572,10 +578,12 @@ int fcoe_percpu_receive_thread(void *arg + skb_linearize(skb); /* not ideal */ + + /* +- * Check the header and pull it off. ++ * Frame length checks and setting up the header pointers ++ * was done in fcoe_rcv already. + */ +- hlen = sizeof(struct fcoe_hdr); +- hp = (struct fcoe_hdr *)skb->data; ++ hp = (struct fcoe_hdr *) skb_network_header(skb); ++ fh = (struct fc_frame_header *) skb_transport_header(skb); ++ + if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) { + if (stats) { + if (stats->ErrorFrames < 5) +@@ -586,22 +594,10 @@ int fcoe_percpu_receive_thread(void *arg + kfree_skb(skb); + continue; + } ++ + skb_pull(skb, sizeof(struct fcoe_hdr)); +- tlen = sizeof(struct fcoe_crc_eof); +- fr_len = skb->len - tlen; +- skb_trim(skb, fr_len); ++ fr_len = skb->len - sizeof(struct fcoe_crc_eof); + +- if (unlikely(fr_len > skb->len)) { +- if (stats) { +- if (stats->ErrorFrames < 5) +- FC_DBG("length error fr_len 0x%x " +- "skb->len 0x%x", fr_len, +- skb->len); +- stats->ErrorFrames++; +- } +- kfree_skb(skb); +- continue; +- } + if (stats) { + stats->RxFrames++; + stats->RxWords += fr_len / FCOE_WORD_TO_BYTE; +@@ -610,9 +606,11 @@ int fcoe_percpu_receive_thread(void *arg + fp = (struct fc_frame *)skb; + cp = (struct fcoe_crc_eof *)(skb->data + fr_len); + fc_frame_init(fp); +- fr_eof(fp) = cp->fcoe_eof; +- fr_sof(fp) = hp->fcoe_sof; + fr_dev(fp) = lp; ++ fr_sof(fp) = hp->fcoe_sof; ++ fr_eof(fp) = cp->fcoe_eof; ++ /* trim off the CRC and EOF trailer*/ ++ skb_trim(skb, fr_len); + + /* + * We only check CRC if no offload is available and if it is +--- a/include/scsi/fc/fc_fcoe.h ++++ b/include/scsi/fc/fc_fcoe.h +@@ -85,6 +85,18 @@ struct fcoe_crc_eof { + } __attribute__((packed)); + + /* ++ * Minimum FCoE + FC header length ++ * 14 bytes FCoE header + 24 byte FC header = 38 bytes ++ */ ++#define FCOE_HEADER_LEN 38 ++ ++/* ++ * Minimum FCoE frame size ++ * 14 bytes FCoE header + 24 byte FC header + 8 byte FCoE trailer = 46 bytes ++ */ ++#define FCOE_MIN_FRAME 46 ++ ++/* + * fc_fcoe_set_mac - Store OUI + DID into MAC address field. + * @mac: mac address to be set + * @did: fc dest id to use +--- a/include/scsi/fc_frame.h ++++ b/include/scsi/fc_frame.h +@@ -66,10 +66,10 @@ struct fcoe_rcv_info { + struct fc_lport *fr_dev; /* transport layer private pointer */ + struct fc_seq *fr_seq; /* for use with exchange manager */ + struct scsi_cmnd *fr_cmd; /* for use of scsi command */ ++ u16 fr_max_payload; /* max FC payload */ + enum fc_sof fr_sof; /* start of frame delimiter */ + enum fc_eof fr_eof; /* end of frame delimiter */ + u8 fr_flags; /* flags - see below */ +- u16 fr_max_payload; /* max FC payload */ + }; + + /* diff --git a/src/patches/suse-2.6.27.25/patches.drivers/fcoe-fix-incorrect-use-of-struct-module.diff b/src/patches/suse-2.6.27.25/patches.drivers/fcoe-fix-incorrect-use-of-struct-module.diff new file mode 100644 index 0000000000..101eaa7d01 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/fcoe-fix-incorrect-use-of-struct-module.diff @@ -0,0 +1,51 @@ +From: James Bottomley +Subject: fcoe: fix incorrect use of struct module +Patch-mainline: 9296e519538b77b5070d49f2f9d66032733c76d4 +References: bnc #468051 + +This structure may not be defined if CONFIG_MODULE=n, so never deref it. Change +uses of module->name to module_name(module) and corrects some dyslexic printks +and docbook comments. + +Reported-by: Randy Dunlap +Cc: Robert Love +Signed-off-by: James Bottomley +Acked-by: Bernhard Walle + +--- + drivers/scsi/fcoe/libfcoe.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/drivers/scsi/fcoe/libfcoe.c ++++ b/drivers/scsi/fcoe/libfcoe.c +@@ -167,7 +167,7 @@ static int fcoe_cpu_callback(struct noti + #endif /* CONFIG_HOTPLUG_CPU */ + + /** +- * foce_rcv - this is the fcoe receive function called by NET_RX_SOFTIRQ ++ * fcoe_rcv - this is the fcoe receive function called by NET_RX_SOFTIRQ + * @skb: the receive skb + * @dev: associated net device + * @ptype: context +@@ -992,8 +992,8 @@ static int fcoe_ethdrv_get(const struct + + owner = fcoe_netdev_to_module_owner(netdev); + if (owner) { +- printk(KERN_DEBUG "foce:hold driver module %s for %s\n", +- owner->name, netdev->name); ++ printk(KERN_DEBUG "fcoe:hold driver module %s for %s\n", ++ module_name(owner), netdev->name); + return try_module_get(owner); + } + return -ENODEV; +@@ -1012,8 +1012,8 @@ static int fcoe_ethdrv_put(const struct + + owner = fcoe_netdev_to_module_owner(netdev); + if (owner) { +- printk(KERN_DEBUG "foce:release driver module %s for %s\n", +- owner->name, netdev->name); ++ printk(KERN_DEBUG "fcoe:release driver module %s for %s\n", ++ module_name(owner), netdev->name); + module_put(owner); + return 0; + } diff --git a/src/patches/suse-2.6.27.25/patches.drivers/fcoe-improved-load-balancing-i.diff b/src/patches/suse-2.6.27.25/patches.drivers/fcoe-improved-load-balancing-i.diff new file mode 100644 index 0000000000..9bef62111d --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/fcoe-improved-load-balancing-i.diff @@ -0,0 +1,43 @@ +From: Vasu Dev +Subject: [FcOE] improved load balancing in rx path +References: bnc #459142 + +Currently incoming frame exchange id ANDing with total number of bits +in online CPU bits mask, resulted only at most two CPUs selection in +rx path, so instead used online CPU bits mask to direct incoming frame +to a all cpus for better load balancing. + +Added code to default to first CPU in case selected CPU is offline or +its rx thread not present. + +Signed-off-by: Vasu Dev +Acked-by: Bernhard Walle +--- + + drivers/scsi/fcoe/libfcoe.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + + +--- a/drivers/scsi/fcoe/libfcoe.c ++++ b/drivers/scsi/fcoe/libfcoe.c +@@ -230,13 +230,14 @@ int fcoe_rcv(struct sk_buff *skb, struct + cpu_idx = 0; + #ifdef CONFIG_SMP + /* +- * The exchange ID are ANDed with num of online CPUs, +- * so that will have the least lock contention in +- * handling the exchange. if there is no thread +- * for a given idx then use first online cpu. ++ * The incoming frame exchange id(oxid) is ANDed with num of online ++ * cpu bits to get cpu_idx and then this cpu_idx is used for selecting ++ * a per cpu kernel thread from fcoe_percpu. In case the cpu is ++ * offline or no kernel thread for derived cpu_idx then cpu_idx is ++ * initialize to first online cpu index. + */ +- cpu_idx = oxid & (num_online_cpus() >> 1); +- if (fcoe_percpu[cpu_idx] == NULL) ++ cpu_idx = oxid & (num_online_cpus() - 1); ++ if (!fcoe_percpu[cpu_idx] || !cpu_online(cpu_idx)) + cpu_idx = first_cpu(cpu_online_map); + #endif + fps = fcoe_percpu[cpu_idx]; diff --git a/src/patches/suse-2.6.27.25/patches.drivers/fcoe-logoff-of-the-fabric-when.diff b/src/patches/suse-2.6.27.25/patches.drivers/fcoe-logoff-of-the-fabric-when.diff new file mode 100644 index 0000000000..a1e32718c7 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/fcoe-logoff-of-the-fabric-when.diff @@ -0,0 +1,26 @@ +From: Robert Love +Subject: [FcOE] Logoff of the fabric when destroying interface +References: bnc #459142 + +This line was accidentally removed by a previous patch. + +Signed-off-by: Robert Love +Acked-by: Bernhard Walle +--- + + drivers/scsi/fcoe/fcoe_sw.c | 3 +++ + 1 file changed, 3 insertions(+) + + +--- a/drivers/scsi/fcoe/fcoe_sw.c ++++ b/drivers/scsi/fcoe/fcoe_sw.c +@@ -302,6 +302,9 @@ static int fcoe_sw_destroy(struct net_de + + fc = fcoe_softc(lp); + ++ /* Logout of the fabric */ ++ fc_fabric_logoff(lp); ++ + /* Remove the instance from fcoe's list */ + fcoe_hostlist_remove(lp); + diff --git a/src/patches/suse-2.6.27.25/patches.drivers/fcoe-remove-warn_on-in-fc_set.diff b/src/patches/suse-2.6.27.25/patches.drivers/fcoe-remove-warn_on-in-fc_set.diff new file mode 100644 index 0000000000..76a470aaac --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/fcoe-remove-warn_on-in-fc_set.diff @@ -0,0 +1,46 @@ +From: Yi Zou +Subject: [FcOE] remove WARN_ON in fc_set_mfs +References: bnc #459142 + +remove WARN_ON in fc_set_mfs(), also adde comments. + +Signed-off-by: Yi Zou +Acked-by: Bernhard Walle +--- + + drivers/scsi/libfc/fc_lport.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + + +--- a/drivers/scsi/libfc/fc_lport.c ++++ b/drivers/scsi/libfc/fc_lport.c +@@ -656,10 +656,20 @@ int fc_lport_destroy(struct fc_lport *lp + } + EXPORT_SYMBOL(fc_lport_destroy); + ++/** ++ * fc_set_mfs - sets up the mfs for the corresponding fc_lport ++ * @lport: fc_lport pointer to unregister ++ * @mfs: the new mfs for fc_lport ++ * ++ * Set mfs for the given fc_lport to the new mfs. ++ * ++ * Return: 0 for success ++ * ++ **/ + int fc_set_mfs(struct fc_lport *lport, u32 mfs) + { + unsigned int old_mfs; +- int rc = -1; ++ int rc = -EINVAL; + + mutex_lock(&lport->lp_mutex); + +@@ -667,7 +677,6 @@ int fc_set_mfs(struct fc_lport *lport, u + + if (mfs >= FC_MIN_MAX_FRAME) { + mfs &= ~3; +- WARN_ON((size_t) mfs < FC_MIN_MAX_FRAME); + if (mfs > FC_MAX_FRAME) + mfs = FC_MAX_FRAME; + mfs -= sizeof(struct fc_frame_header); diff --git a/src/patches/suse-2.6.27.25/patches.drivers/fcoe-user_mfs-is-never-used.diff b/src/patches/suse-2.6.27.25/patches.drivers/fcoe-user_mfs-is-never-used.diff new file mode 100644 index 0000000000..6247ddebd8 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/fcoe-user_mfs-is-never-used.diff @@ -0,0 +1,34 @@ +From: Yi Zou +Subject: [FcOE] user_mfs is never used +References: bnc #459142 + +Signed-off-by: Yi Zou +Acked-by: Bernhard Walle +--- + + drivers/scsi/fcoe/libfcoe.c | 2 -- + include/scsi/libfcoe.h | 1 - + 2 files changed, 3 deletions(-) + + +--- a/drivers/scsi/fcoe/libfcoe.c ++++ b/drivers/scsi/fcoe/libfcoe.c +@@ -900,8 +900,6 @@ static int fcoe_device_notification(stru + mfs = fc->real_dev->mtu - + (sizeof(struct fcoe_hdr) + + sizeof(struct fcoe_crc_eof)); +- if (fc->user_mfs && fc->user_mfs < mfs) +- mfs = fc->user_mfs; + if (mfs >= FC_MIN_MAX_FRAME) + fc_set_mfs(lp, mfs); + new_status &= ~FC_LINK_UP; +--- a/include/scsi/libfcoe.h ++++ b/include/scsi/libfcoe.h +@@ -46,7 +46,6 @@ struct fcoe_softc { + struct net_device *phys_dev; /* device with ethtool_ops */ + struct packet_type fcoe_packet_type; + struct sk_buff_head fcoe_pending_queue; +- u16 user_mfs; /* configured max frame size */ + + u8 dest_addr[ETH_ALEN]; + u8 ctl_src_addr[ETH_ALEN]; diff --git a/src/patches/suse-2.6.27.25/patches.drivers/libfc-add-fc_disc-c-locking-co.diff b/src/patches/suse-2.6.27.25/patches.drivers/libfc-add-fc_disc-c-locking-co.diff new file mode 100644 index 0000000000..5da8021bc8 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/libfc-add-fc_disc-c-locking-co.diff @@ -0,0 +1,29 @@ +From: Robert Love +Subject: [FcOE] Add fc_disc.c locking comment block +References: bnc #459142 + +Signed-off-by: Robert Love +Acked-by: Bernhard Walle +--- + + drivers/scsi/libfc/fc_disc.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + + +--- a/drivers/scsi/libfc/fc_disc.c ++++ b/drivers/scsi/libfc/fc_disc.c +@@ -24,6 +24,14 @@ + * also handles RSCN events and re-discovery if necessary. + */ + ++/* ++ * DISC LOCKING ++ * ++ * The disc mutex is can be locked when acquiring rport locks, but may not ++ * be held when acquiring the lport lock. Refer to fc_lport.c for more ++ * details. ++ */ ++ + #include + #include + #include diff --git a/src/patches/suse-2.6.27.25/patches.drivers/libfc-check-for-err-when-recv-state-is-incorrect.diff b/src/patches/suse-2.6.27.25/patches.drivers/libfc-check-for-err-when-recv-state-is-incorrect.diff new file mode 100644 index 0000000000..9a2b00fb2c --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/libfc-check-for-err-when-recv-state-is-incorrect.diff @@ -0,0 +1,218 @@ +From 251b8184b1bd4e17656d72ba9cffcba733092064 Mon Sep 17 00:00:00 2001 +From: Robert Love +Date: Mon, 2 Feb 2009 10:13:06 -0800 +Subject: [PATCH] libfc: check for err when recv and state is incorrect +References: bnc#473602 + +If we've just created an interface and the an rport is +logging in we may have a request on the wire (say PRLI). +If we destroy the interface, we'll go through each rport +on the disc->rports list and set each rport's state to NONE. +Then the lport will reset the EM. The EM reset will send a +CLOSED event to the prli_resp() handler which will notice +that the state != PRLI. In this case it frees the frame +pointer, decrements the refcount and unlocks the rport. + +The problem is that there isn't a frame in this case. It's +just a pointer with an embedded error code. The free causes +an Oops. + +This patch moves the error checking to be before the state +checking. + +Signed-off-by: Robert Love +Signed-off-by: Hannes Reinecke + +--- + drivers/scsi/libfc/fc_lport.c | 50 +++++++++++++++++++++--------------------- + drivers/scsi/libfc/fc_rport.c | 30 ++++++++++++------------- + 2 files changed, 40 insertions(+), 40 deletions(-) + +--- a/drivers/scsi/libfc/fc_lport.c ++++ b/drivers/scsi/libfc/fc_lport.c +@@ -1031,17 +1031,17 @@ static void fc_lport_rft_id_resp(struct + + FC_DEBUG_LPORT("Received a RFT_ID response\n"); + ++ if (IS_ERR(fp)) { ++ fc_lport_error(lport, fp); ++ goto err; ++ } ++ + if (lport->state != LPORT_ST_RFT_ID) { + FC_DBG("Received a RFT_ID response, but in state %s\n", + fc_lport_state(lport)); + goto out; + } + +- if (IS_ERR(fp)) { +- fc_lport_error(lport, fp); +- goto err; +- } +- + fh = fc_frame_header_get(fp); + ct = fc_frame_payload_get(fp, sizeof(*ct)); + +@@ -1083,17 +1083,17 @@ static void fc_lport_rpn_id_resp(struct + + FC_DEBUG_LPORT("Received a RPN_ID response\n"); + ++ if (IS_ERR(fp)) { ++ fc_lport_error(lport, fp); ++ goto err; ++ } ++ + if (lport->state != LPORT_ST_RPN_ID) { + FC_DBG("Received a RPN_ID response, but in state %s\n", + fc_lport_state(lport)); + goto out; + } + +- if (IS_ERR(fp)) { +- fc_lport_error(lport, fp); +- goto err; +- } +- + fh = fc_frame_header_get(fp); + ct = fc_frame_payload_get(fp, sizeof(*ct)); + if (fh && ct && fh->fh_type == FC_TYPE_CT && +@@ -1133,17 +1133,17 @@ static void fc_lport_scr_resp(struct fc_ + + FC_DEBUG_LPORT("Received a SCR response\n"); + ++ if (IS_ERR(fp)) { ++ fc_lport_error(lport, fp); ++ goto err; ++ } ++ + if (lport->state != LPORT_ST_SCR) { + FC_DBG("Received a SCR response, but in state %s\n", + fc_lport_state(lport)); + goto out; + } + +- if (IS_ERR(fp)) { +- fc_lport_error(lport, fp); +- goto err; +- } +- + op = fc_frame_payload_op(fp); + if (op == ELS_LS_ACC) + fc_lport_enter_ready(lport); +@@ -1359,17 +1359,17 @@ static void fc_lport_logo_resp(struct fc + + FC_DEBUG_LPORT("Received a LOGO response\n"); + ++ if (IS_ERR(fp)) { ++ fc_lport_error(lport, fp); ++ goto err; ++ } ++ + if (lport->state != LPORT_ST_LOGO) { + FC_DBG("Received a LOGO response, but in state %s\n", + fc_lport_state(lport)); + goto out; + } + +- if (IS_ERR(fp)) { +- fc_lport_error(lport, fp); +- goto err; +- } +- + op = fc_frame_payload_op(fp); + if (op == ELS_LS_ACC) + fc_lport_enter_reset(lport); +@@ -1443,17 +1443,17 @@ static void fc_lport_flogi_resp(struct f + + FC_DEBUG_LPORT("Received a FLOGI response\n"); + ++ if (IS_ERR(fp)) { ++ fc_lport_error(lport, fp); ++ goto err; ++ } ++ + if (lport->state != LPORT_ST_FLOGI) { + FC_DBG("Received a FLOGI response, but in state %s\n", + fc_lport_state(lport)); + goto out; + } + +- if (IS_ERR(fp)) { +- fc_lport_error(lport, fp); +- goto err; +- } +- + fh = fc_frame_header_get(fp); + did = ntoh24(fh->fh_d_id); + if (fc_frame_payload_op(fp) == ELS_LS_ACC && did != 0) { +--- a/drivers/scsi/libfc/fc_rport.c ++++ b/drivers/scsi/libfc/fc_rport.c +@@ -505,17 +505,17 @@ static void fc_rport_plogi_resp(struct f + FC_DEBUG_RPORT("Received a PLOGI response from port (%6x)\n", + rport->port_id); + ++ if (IS_ERR(fp)) { ++ fc_rport_error_retry(rport, fp); ++ goto err; ++ } ++ + if (rdata->rp_state != RPORT_ST_PLOGI) { + FC_DBG("Received a PLOGI response, but in state %s\n", + fc_rport_state(rport)); + goto out; + } + +- if (IS_ERR(fp)) { +- fc_rport_error_retry(rport, fp); +- goto err; +- } +- + op = fc_frame_payload_op(fp); + if (op == ELS_LS_ACC && + (plp = fc_frame_payload_get(fp, sizeof(*plp))) != NULL) { +@@ -614,17 +614,17 @@ static void fc_rport_prli_resp(struct fc + FC_DEBUG_RPORT("Received a PRLI response from port (%6x)\n", + rport->port_id); + ++ if (IS_ERR(fp)) { ++ fc_rport_error_retry(rport, fp); ++ goto err; ++ } ++ + if (rdata->rp_state != RPORT_ST_PRLI) { + FC_DBG("Received a PRLI response, but in state %s\n", + fc_rport_state(rport)); + goto out; + } + +- if (IS_ERR(fp)) { +- fc_rport_error_retry(rport, fp); +- goto err; +- } +- + op = fc_frame_payload_op(fp); + if (op == ELS_LS_ACC) { + pp = fc_frame_payload_get(fp, sizeof(*pp)); +@@ -764,17 +764,17 @@ static void fc_rport_rtv_resp(struct fc_ + FC_DEBUG_RPORT("Received a RTV response from port (%6x)\n", + rport->port_id); + ++ if (IS_ERR(fp)) { ++ fc_rport_error(rport, fp); ++ goto err; ++ } ++ + if (rdata->rp_state != RPORT_ST_RTV) { + FC_DBG("Received a RTV response, but in state %s\n", + fc_rport_state(rport)); + goto out; + } + +- if (IS_ERR(fp)) { +- fc_rport_error(rport, fp); +- goto err; +- } +- + op = fc_frame_payload_op(fp); + if (op == ELS_LS_ACC) { + struct fc_els_rtv_acc *rtv; diff --git a/src/patches/suse-2.6.27.25/patches.drivers/libfc-ensure-correct-device_pu.diff b/src/patches/suse-2.6.27.25/patches.drivers/libfc-ensure-correct-device_pu.diff new file mode 100644 index 0000000000..ad60ec20fc --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/libfc-ensure-correct-device_pu.diff @@ -0,0 +1,214 @@ +From: Robert Love +Subject: libfc: Ensure correct device_put/get usage (round 2) +References: + +Reference counting was barely used and where used +it was incorrect. This patch creates a few simple +policies. + +When the rport->dev [e.g. struct device] is initialized +it starts with a refcnt of 1. Whenever we're using the +rport we will increment the count. When we logoff we +should decrement the count to 0 and the 'release' +function will be called. The FC transport provides the +release function for real rports and libfc provides it +for rogue rports. When we switch from a rogue to real +rport we'll decrement the refcnt on the rogue rport +and increment it for the real rport, after we've created +it. + +Any externally initiated action on an rport (login, +logoff) will not require the caller to increment and +decrement the refcnt. + +For rport_login(), the rport will have just been created +and therefore no other thread would be able to access +this object. + +For rport_logoff(), the rport will have been removed +from the list of rports and therefore no other thread +would be able to lookup() this rport. + +This patch removes the get_device() from the rport_lookup +function. These are the places where it is called and why +we don't need a reference. + +fc_disc_recv_rscn_req() - called for single port RSCNs + the disc mutex is held and + ensures that no other thread + will find this rport. + +fc_disc_new_target() - Same. The rport cannot be looked up + so no other thread can free the rport. + This code looks buggy though, we + shouldn't be calling rport_login() on + a 'real' rport, which we could do. + +fc_disc_single() - Same. disc mutex protects the list. + +fc_lport_recv_req() - Similar, but this time the lport lock + ensures that no incoming requests are + processed until the current request + for an rport has returned. + +When the rport layer needs to send a request it will +increment the count so that the EM can be confident that +the rport is present when making the callback. If +fc_remote_port_delete() is called before the response +callback, which is often the case for LOGO commands, the +refcnt will still have a value of 1 becuase we grabbed the +lock before the ctels_send() is called. The exchange would +have been removed and so the callback will be called with +an error code. After processing the error code we'll +decrement the refcnt for the last time and the rport will +be free'd. + +Since point-to-point mode is not working this patch +does not consider point-to-point. + +Signed-off-by: Robert Love +Acked-by: Bernhard Walle +--- + + drivers/scsi/libfc/fc_disc.c | 5 +---- + drivers/scsi/libfc/fc_lport.c | 5 ++--- + drivers/scsi/libfc/fc_rport.c | 21 ++++++++++++++------- + 3 files changed, 17 insertions(+), 14 deletions(-) + + +--- a/drivers/scsi/libfc/fc_disc.c ++++ b/drivers/scsi/libfc/fc_disc.c +@@ -81,7 +81,6 @@ struct fc_rport *fc_disc_lookup_rport(co + if (rport->port_id == port_id) { + disc_found = 1; + found = rport; +- get_device(&found->dev); + break; + } + } +@@ -767,10 +766,8 @@ static void fc_disc_single(struct fc_dis + goto out; + + rport = lport->tt.rport_lookup(lport, dp->ids.port_id); +- if (rport) { ++ if (rport) + fc_disc_del_target(disc, rport); +- put_device(&rport->dev); /* hold from lookup */ +- } + + new_rport = fc_rport_rogue_create(dp); + if (new_rport) { +--- a/drivers/scsi/libfc/fc_lport.c ++++ b/drivers/scsi/libfc/fc_lport.c +@@ -908,10 +908,9 @@ static void fc_lport_recv_req(struct fc_ + d_id = ntoh24(fh->fh_d_id); + + rport = lport->tt.rport_lookup(lport, s_id); +- if (rport) { ++ if (rport) + lport->tt.rport_recv_req(sp, fp, rport); +- put_device(&rport->dev); /* hold from lookup */ +- } else { ++ else { + rjt_data.fp = NULL; + rjt_data.reason = ELS_RJT_UNAB; + rjt_data.explan = ELS_EXPL_NONE; +--- a/drivers/scsi/libfc/fc_rport.c ++++ b/drivers/scsi/libfc/fc_rport.c +@@ -111,16 +111,11 @@ struct fc_rport *fc_rport_rogue_create(s + rport->roles = dp->ids.roles; + rport->maxframe_size = FC_MIN_MAX_PAYLOAD; + /* +- * init the device, so other code can manipulate the rport as if +- * it came from the fc class. We also do an extra get because +- * libfc will free this rport instead of relying on the normal +- * refcounting. +- * + * Note: all this libfc rogue rport code will be removed for + * upstream so it fine that this is really ugly and hacky right now. + */ + device_initialize(&rport->dev); +- get_device(&rport->dev); ++ rport->dev.release = fc_rport_rogue_destroy; // XXX: bwalle + + mutex_init(&rdata->rp_mutex); + rdata->local_port = dp->lp; +@@ -402,9 +397,9 @@ static void fc_rport_timeout(struct work + case RPORT_ST_NONE: + break; + } +- put_device(&rport->dev); + + mutex_unlock(&rdata->rp_mutex); ++ put_device(&rport->dev); + } + + /** +@@ -531,6 +526,7 @@ out: + fc_frame_free(fp); + err: + mutex_unlock(&rdata->rp_mutex); ++ put_device(&rport->dev); + } + + /** +@@ -562,6 +558,8 @@ static void fc_rport_enter_plogi(struct + if (!lport->tt.elsct_send(lport, rport, fp, ELS_PLOGI, + fc_rport_plogi_resp, rport, lport->e_d_tov)) + fc_rport_error(rport, fp); ++ else ++ get_device(&rport->dev); + } + + /** +@@ -631,6 +629,7 @@ out: + fc_frame_free(fp); + err: + mutex_unlock(&rdata->rp_mutex); ++ put_device(&rport->dev); + } + + /** +@@ -679,6 +678,7 @@ out: + fc_frame_free(fp); + err: + mutex_unlock(&rdata->rp_mutex); ++ put_device(&rport->dev); + } + + /** +@@ -712,6 +712,8 @@ static void fc_rport_enter_prli(struct f + if (!lport->tt.elsct_send(lport, rport, fp, ELS_PRLI, + fc_rport_prli_resp, rport, lport->e_d_tov)) + fc_rport_error(rport, fp); ++ else ++ get_device(&rport->dev); + } + + /** +@@ -777,6 +779,7 @@ out: + fc_frame_free(fp); + err: + mutex_unlock(&rdata->rp_mutex); ++ put_device(&rport->dev); + } + + /** +@@ -806,6 +809,8 @@ static void fc_rport_enter_rtv(struct fc + if (!lport->tt.elsct_send(lport, rport, fp, ELS_RTV, + fc_rport_rtv_resp, rport, lport->e_d_tov)) + fc_rport_error(rport, fp); ++ else ++ get_device(&rport->dev); + } + + /** +@@ -835,6 +840,8 @@ static void fc_rport_enter_logo(struct f + if (!lport->tt.elsct_send(lport, rport, fp, ELS_LOGO, + fc_rport_logo_resp, rport, lport->e_d_tov)) + fc_rport_error(rport, fp); ++ else ++ get_device(&rport->dev); + } + + diff --git a/src/patches/suse-2.6.27.25/patches.drivers/libfc-handle-rrq-exch-timeout.diff b/src/patches/suse-2.6.27.25/patches.drivers/libfc-handle-rrq-exch-timeout.diff new file mode 100644 index 0000000000..fbc2cc2e5a --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/libfc-handle-rrq-exch-timeout.diff @@ -0,0 +1,27 @@ +From: Vasu Dev +Subject: libfc: handle RRQ exch timeout +References: bnc #465596 + +Cleanup exchange held due to RRQ when RRQ exch times out, in this case the +ABTS is already done causing RRQ req therefore proceeding with cleanup in +fc_exch_rrq_resp should be okay to restore exch resource. + +Signed-off-by: Vasu Dev +Acked-by: Bernhard Walle +--- + + drivers/scsi/libfc/fc_exch.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + + +--- a/drivers/scsi/libfc/fc_exch.c ++++ b/drivers/scsi/libfc/fc_exch.c +@@ -1605,7 +1605,7 @@ static void fc_exch_rrq_resp(struct fc_s + if (IS_ERR(fp)) { + int err = PTR_ERR(fp); + +- if (err == -FC_EX_CLOSED) ++ if (err == -FC_EX_CLOSED || err == -FC_EX_TIMEOUT) + goto cleanup; + FC_DBG("Cannot process RRQ, because of frame error %d\n", err); + return; diff --git a/src/patches/suse-2.6.27.25/patches.drivers/libfc-improve-fc_lport-c-locki.diff b/src/patches/suse-2.6.27.25/patches.drivers/libfc-improve-fc_lport-c-locki.diff new file mode 100644 index 0000000000..89f885df74 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/libfc-improve-fc_lport-c-locki.diff @@ -0,0 +1,114 @@ +From: Robert Love +Subject: [FcOE] Improve fc_lport.c locking comment block +References: bnc #459142 + +Signed-off-by: Robert Love +Acked-by: Bernhard Walle +--- + + drivers/scsi/libfc/fc_lport.c | 76 ++++++++++++++++++++++++------------------ + 1 file changed, 45 insertions(+), 31 deletions(-) + + +--- a/drivers/scsi/libfc/fc_lport.c ++++ b/drivers/scsi/libfc/fc_lport.c +@@ -18,34 +18,51 @@ + */ + + /* +- * General locking notes: ++ * PORT LOCKING NOTES + * +- * The lport and rport blocks both have mutexes that are used to protect +- * the port objects states. The main motivation for this protection is that +- * we don't want to be preparing a request/response in one context while +- * another thread "resets" the port in question. For example, if the lport +- * block is sending a SCR request to the directory server we don't want +- * the lport to be reset before we fill out the frame header's port_id. The +- * problem is that a reset would cause the lport's port_id to reset to 0. +- * If we don't protect the lport we'd spew incorrect frames. +- * +- * At the time of this writing there are two primary mutexes, one for the +- * lport and one for the rport. Since the lport uses the rport and makes +- * calls into that block the rport should never make calls that would cause +- * the lport's mutex to be locked. In other words, the lport's mutex is +- * considered the outer lock and the rport's lock is considered the inner +- * lock. The bottom line is that you can hold a lport's mutex and then +- * hold the rport's mutex, but not the other way around. +- * +- * The only complication to this rule is the callbacks from the rport to +- * the lport's rport_callback function. When rports become READY they make +- * a callback to the lport so that it can track them. In the case of the +- * directory server that callback might cause the lport to change its +- * state, implying that the lport mutex would need to be held. This problem +- * was solved by serializing the rport notifications to the lport and the +- * callback is made without holding the rport's lock. ++ * These comments only apply to the 'port code' which consists of the lport, ++ * disc and rport blocks. + * +- * lport locking notes: ++ * MOTIVATION ++ * ++ * The lport, disc and rport blocks all have mutexes that are used to protect ++ * those objects. The main motivation for these locks is to prevent from ++ * having an lport reset just before we send a frame. In that scenario the ++ * lport's FID would get set to zero and then we'd send a frame with an ++ * invalid SID. We also need to ensure that states don't change unexpectedly ++ * while processing another state. ++ * ++ * HEIRARCHY ++ * ++ * The following heirarchy defines the locking rules. A greater lock ++ * may be held before acquiring a lesser lock, but a lesser lock should never ++ * be held while attempting to acquire a greater lock. Here is the heirarchy- ++ * ++ * lport > disc, lport > rport, disc > rport ++ * ++ * CALLBACKS ++ * ++ * The callbacks cause complications with this scheme. There is a callback ++ * from the rport (to either lport or disc) and a callback from disc ++ * (to the lport). ++ * ++ * As rports exit the rport state machine a callback is made to the owner of ++ * the rport to notify success or failure. Since the callback is likely to ++ * cause the lport or disc to grab its lock we cannot hold the rport lock ++ * while making the callback. To ensure that the rport is not free'd while ++ * processing the callback the rport callbacks are serialized through a ++ * single-threaded workqueue. An rport would never be free'd while in a ++ * callback handler becuase no other rport work in this queue can be executed ++ * at the same time. ++ * ++ * When discovery succeeds or fails a callback is made to the lport as ++ * notification. Currently, succesful discovery causes the lport to take no ++ * action. A failure will cause the lport to reset. There is likely a circular ++ * locking problem with this implementation. ++ */ ++ ++/* ++ * LPORT LOCKING + * + * The critical sections protected by the lport's mutex are quite broad and + * may be improved upon in the future. The lport code and its locking doesn't +@@ -54,9 +71,9 @@ + * + * The strategy is to lock whenever processing a request or response. Note + * that every _enter_* function corresponds to a state change. They generally +- * change the lports state and then sends a request out on the wire. We lock ++ * change the lports state and then send a request out on the wire. We lock + * before calling any of these functions to protect that state change. This +- * means that the entry points into the lport block to manage the locks while ++ * means that the entry points into the lport block manage the locks while + * the state machine can transition between states (i.e. _enter_* functions) + * while always staying protected. + * +@@ -68,9 +85,6 @@ + * Retries also have to consider the locking. The retries occur from a work + * context and the work function will lock the lport and then retry the state + * (i.e. _enter_* function). +- * +- * The implication to all of this is that each lport can only process one +- * state at a time. + */ + + #include diff --git a/src/patches/suse-2.6.27.25/patches.drivers/libfc-improve-fc_rport-c-locki.diff b/src/patches/suse-2.6.27.25/patches.drivers/libfc-improve-fc_rport-c-locki.diff new file mode 100644 index 0000000000..755a646ff8 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/libfc-improve-fc_rport-c-locki.diff @@ -0,0 +1,50 @@ +From: Robert Love +Subject: Improve fc_rport.c locking comment block +References: 459142 + +checkpatch.pl was complaining about having spaces +after '*'s. It seemed to be a false positive. I split +the comment block into two blocks and it resolved the +ERROR. + +Signed-off-by: Robert Love +Acked-by: Bernhard Walle +--- + + drivers/scsi/libfc/fc_rport.c | 17 ++++++++++------- + 1 file changed, 10 insertions(+), 7 deletions(-) + + +--- a/drivers/scsi/libfc/fc_rport.c ++++ b/drivers/scsi/libfc/fc_rport.c +@@ -18,20 +18,23 @@ + */ + + /* ++ * RPORT GENERAL INFO ++ * + * This file contains all processing regarding fc_rports. It contains the + * rport state machine and does all rport interaction with the transport class. + * There should be no other places in libfc that interact directly with the + * transport class in regards to adding and deleting rports. + * + * fc_rport's represent N_Port's within the fabric. ++ */ ++ ++/* ++ * RPORT LOCKING + * +- * rport locking notes: +- * +- * The rport should never hold the rport mutex and then lock the lport +- * mutex. The rport's mutex is considered lesser than the lport's mutex, so +- * the lport mutex can be held before locking the rport mutex, but not the +- * other way around. See the comment block at the top of fc_lport.c for more +- * details. ++ * The rport should never hold the rport mutex and then attempt to acquire ++ * either the lport or disc mutexes. The rport's mutex is considered lesser ++ * than both the lport's mutex and the disc mutex. Refer to fc_lport.c for ++ * more comments on the heirarchy. + * + * The locking strategy is similar to the lport's strategy. The lock protects + * the rport's states and is held and released by the entry points to the rport diff --git a/src/patches/suse-2.6.27.25/patches.drivers/libfc-make-fc_disc-inline-with.diff b/src/patches/suse-2.6.27.25/patches.drivers/libfc-make-fc_disc-inline-with.diff new file mode 100644 index 0000000000..a6a7e2a2c1 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/libfc-make-fc_disc-inline-with.diff @@ -0,0 +1,217 @@ +From: Chris Leech +Subject: [FcOE] make fc_disc inline with the fc_lport structure +References: bnc #459142 + +The extra memory allocation we're not being checked for failure. Rather than +further complicating things, just make the discovery code required fields be +part of the lport structure. + +Signed-off-by: Chris Leech +Acked-by: Bernhard Walle +--- + + drivers/scsi/libfc/fc_disc.c | 80 +++++++----------------------------------- + drivers/scsi/libfc/fc_lport.c | 2 - + include/scsi/libfc.h | 22 ++++++++++- + 3 files changed, 35 insertions(+), 69 deletions(-) + + +--- a/drivers/scsi/libfc/fc_disc.c ++++ b/drivers/scsi/libfc/fc_disc.c +@@ -45,26 +45,6 @@ static int fc_disc_debug; + FC_DBG(fmt); \ + } while (0) + +-struct fc_disc { +- unsigned char retry_count; +- unsigned char delay; +- unsigned char pending; +- unsigned char requested; +- unsigned short seq_count; +- unsigned char buf_len; +- enum fc_disc_event event; +- +- void (*disc_callback)(struct fc_lport *, +- enum fc_disc_event); +- +- struct list_head rports; +- struct fc_lport *lport; +- struct mutex disc_mutex; +- struct fc_gpn_ft_resp partial_buf; /* partial name buffer */ +- struct delayed_work disc_work; +- +-}; +- + static void fc_disc_gpn_ft_req(struct fc_disc *); + static void fc_disc_gpn_ft_resp(struct fc_seq *, struct fc_frame *, void *); + static int fc_disc_new_target(struct fc_disc *, struct fc_rport *, +@@ -83,14 +63,11 @@ static void fc_disc_restart(struct fc_di + struct fc_rport *fc_disc_lookup_rport(const struct fc_lport *lport, + u32 port_id) + { +- struct fc_disc *disc = lport->disc; ++ const struct fc_disc *disc = &lport->disc; + struct fc_rport *rport, *found = NULL; + struct fc_rport_libfc_priv *rdata; + int disc_found = 0; + +- if (!disc) +- return NULL; +- + list_for_each_entry(rdata, &disc->rports, peers) { + rport = PRIV_TO_RPORT(rdata); + if (rport->port_id == port_id) { +@@ -108,27 +85,6 @@ struct fc_rport *fc_disc_lookup_rport(co + } + + /** +- * fc_disc_alloc - Allocate a discovery work object +- * @lport: The FC lport associated with the discovery job +- */ +-static inline struct fc_disc *fc_disc_alloc(struct fc_lport *lport) +-{ +- struct fc_disc *disc; +- +- disc = kzalloc(sizeof(struct fc_disc), GFP_KERNEL); +- INIT_DELAYED_WORK(&disc->disc_work, fc_disc_timeout); +- mutex_init(&disc->disc_mutex); +- INIT_LIST_HEAD(&disc->rports); +- +- disc->lport = lport; +- lport->disc = disc; +- disc->delay = FC_DISC_DELAY; +- disc->event = DISC_EV_NONE; +- +- return disc; +-} +- +-/** + * fc_disc_stop_rports - delete all the remote ports associated with the lport + * @disc: The discovery job to stop rports on + * +@@ -167,7 +123,7 @@ static void fc_disc_rport_callback(struc + enum fc_rport_event event) + { + struct fc_rport_libfc_priv *rdata = rport->dd_data; +- struct fc_disc *disc = lport->disc; ++ struct fc_disc *disc = &lport->disc; + int found = 0; + + FC_DEBUG_DISC("Received a %d event for port (%6x)\n", event, +@@ -304,13 +260,7 @@ static void fc_disc_recv_req(struct fc_s + struct fc_lport *lport) + { + u8 op; +- struct fc_disc *disc = lport->disc; +- +- if (!disc) { +- FC_DBG("Received a request for an lport not managed " +- "by the discovery engine\n"); +- return; +- } ++ struct fc_disc *disc = &lport->disc; + + op = fc_frame_payload_op(fp); + switch (op) { +@@ -365,17 +315,7 @@ static void fc_disc_start(void (*disc_ca + { + struct fc_rport *rport; + struct fc_rport_identifiers ids; +- struct fc_disc *disc = lport->disc; +- +- if (!disc) { +- FC_DEBUG_DISC("No existing discovery job, " +- "creating one for lport (%6x)\n", +- fc_host_port_id(lport->host)); +- disc = fc_disc_alloc(lport); +- } else +- FC_DEBUG_DISC("Found an existing discovery job " +- "for lport (%6x)\n", +- fc_host_port_id(lport->host)); ++ struct fc_disc *disc = &lport->disc; + + /* + * At this point we may have a new disc job or an existing +@@ -831,7 +771,7 @@ out: + */ + void fc_disc_stop(struct fc_lport *lport) + { +- struct fc_disc *disc = lport->disc; ++ struct fc_disc *disc = &lport->disc; + + if (disc) { + cancel_delayed_work_sync(&disc->disc_work); +@@ -858,6 +798,7 @@ void fc_disc_stop_final(struct fc_lport + */ + int fc_disc_init(struct fc_lport *lport) + { ++ struct fc_disc *disc; + + if (!lport->tt.disc_start) + lport->tt.disc_start = fc_disc_start; +@@ -874,6 +815,15 @@ int fc_disc_init(struct fc_lport *lport) + if (!lport->tt.rport_lookup) + lport->tt.rport_lookup = fc_disc_lookup_rport; + ++ disc = &lport->disc; ++ INIT_DELAYED_WORK(&disc->disc_work, fc_disc_timeout); ++ mutex_init(&disc->disc_mutex); ++ INIT_LIST_HEAD(&disc->rports); ++ ++ disc->lport = lport; ++ disc->delay = FC_DISC_DELAY; ++ disc->event = DISC_EV_NONE; ++ + return 0; + } + EXPORT_SYMBOL(fc_disc_init); +--- a/drivers/scsi/libfc/fc_lport.c ++++ b/drivers/scsi/libfc/fc_lport.c +@@ -627,8 +627,6 @@ int fc_fabric_logoff(struct fc_lport *lp + { + lport->tt.disc_stop_final(lport); + mutex_lock(&lport->lp_mutex); +- kfree(lport->disc); +- lport->disc = NULL; + fc_lport_enter_logo(lport); + mutex_unlock(&lport->lp_mutex); + return 0; +--- a/include/scsi/libfc.h ++++ b/include/scsi/libfc.h +@@ -572,7 +572,25 @@ struct libfc_function_template { + void (*disc_stop_final) (struct fc_lport *); + }; + +-struct fc_disc; ++/* information used by the discovery layer */ ++struct fc_disc { ++ unsigned char retry_count; ++ unsigned char delay; ++ unsigned char pending; ++ unsigned char requested; ++ unsigned short seq_count; ++ unsigned char buf_len; ++ enum fc_disc_event event; ++ ++ void (*disc_callback)(struct fc_lport *, ++ enum fc_disc_event); ++ ++ struct list_head rports; ++ struct fc_lport *lport; ++ struct mutex disc_mutex; ++ struct fc_gpn_ft_resp partial_buf; /* partial name buffer */ ++ struct delayed_work disc_work; ++}; + + struct fc_lport { + struct list_head list; +@@ -582,8 +600,8 @@ struct fc_lport { + struct fc_exch_mgr *emp; + struct fc_rport *dns_rp; + struct fc_rport *ptp_rp; +- struct fc_disc *disc; + void *scsi_priv; ++ struct fc_disc disc; + + /* Operational Information */ + struct libfc_function_template tt; diff --git a/src/patches/suse-2.6.27.25/patches.drivers/libfc-make-rscn-parsing-more-r.diff b/src/patches/suse-2.6.27.25/patches.drivers/libfc-make-rscn-parsing-more-r.diff new file mode 100644 index 0000000000..c73f4e08f1 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/libfc-make-rscn-parsing-more-r.diff @@ -0,0 +1,59 @@ +From: Chris Leech +Subject: [FcOE] make RSCN parsing more robust +References: bnc #459142 + +RSCN parsing needs to verify that the payload length specified in the RSCN ELS +message does not exceed the size of the actual frame received. + +Signed-off-by: Chris Leech +Acked-by: Bernhard Walle +--- + + drivers/scsi/libfc/fc_disc.c | 19 +++++++++++++++---- + 1 files changed, 15 insertions(+), 4 deletions(-) + + +diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c +index 0416041..8b609e4 100644 +--- a/drivers/scsi/libfc/fc_disc.c ++++ b/drivers/scsi/libfc/fc_disc.c +@@ -173,17 +173,27 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp, + FC_DEBUG_DISC("Received an RSCN event on port (%6x)\n", + fc_host_port_id(lport->host)); + ++ /* make sure the frame contains an RSCN message */ + rp = fc_frame_payload_get(fp, sizeof(*rp)); +- +- if (!rp || rp->rscn_page_len != sizeof(*pp)) ++ if (!rp) + goto reject; +- ++ /* make sure the page length is as expected (4 bytes) */ ++ if (rp->rscn_page_len != sizeof(*pp)) ++ goto reject; ++ /* get the RSCN payload length */ + len = ntohs(rp->rscn_plen); + if (len < sizeof(*rp)) + goto reject; ++ /* make sure the frame contains the expected payload */ ++ rp = fc_frame_payload_get(fp, len); ++ if (!rp) ++ goto reject; ++ /* payload must be a multiple of the RSCN page size */ + len -= sizeof(*rp); ++ if (len % sizeof(*pp)) ++ goto reject; + +- for (pp = (void *)(rp + 1); len; len -= sizeof(*pp), pp++) { ++ for (pp = (void *)(rp + 1); len > 0; len -= sizeof(*pp), pp++) { + ev_qual = pp->rscn_page_flags >> ELS_RSCN_EV_QUAL_BIT; + ev_qual &= ELS_RSCN_EV_QUAL_MASK; + fmt = pp->rscn_page_flags >> ELS_RSCN_ADDR_FMT_BIT; +@@ -239,6 +249,7 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp, + fc_frame_free(fp); + return; + reject: ++ FC_DEBUG_DISC("Received a bad RSCN frame\n"); + rjt_data.fp = NULL; + rjt_data.reason = ELS_RJT_LOGIC; + rjt_data.explan = ELS_EXPL_NONE; diff --git a/src/patches/suse-2.6.27.25/patches.drivers/libfc-make-sure-we-access-the.diff b/src/patches/suse-2.6.27.25/patches.drivers/libfc-make-sure-we-access-the.diff new file mode 100644 index 0000000000..a1bba0c9eb --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/libfc-make-sure-we-access-the.diff @@ -0,0 +1,114 @@ +From: Chris Leech +Subject: [FcOE] make sure we access the CRC safely +References: bnc #459142 + +Even when fcoe verified that the EOF and CRC trailer bytes were there, when +the CRC check was delayed for solicited SCSI data libfc would look past what +was marked as valid data in the frame to find the CRC in the FCoE trailer. + +Instead, pass the CRC to libfc in the context block. + +Signed-off-by: Chris Leech +Acked-by: Bernhard Walle +--- + + drivers/scsi/fcoe/libfcoe.c | 20 ++++++++++++++------ + drivers/scsi/libfc/fc_fcp.c | 2 +- + drivers/scsi/libfc/fc_frame.c | 2 +- + include/scsi/fc_frame.h | 3 +++ + 4 files changed, 19 insertions(+), 8 deletions(-) + + +--- a/drivers/scsi/fcoe/libfcoe.c ++++ b/drivers/scsi/fcoe/libfcoe.c +@@ -527,7 +527,7 @@ int fcoe_percpu_receive_thread(void *arg + struct fcoe_dev_stats *stats; + struct fc_frame_header *fh; + struct sk_buff *skb; +- struct fcoe_crc_eof *cp; ++ struct fcoe_crc_eof crc_eof; + struct fc_frame *fp; + u8 *mac = NULL; + struct fcoe_softc *fc; +@@ -604,13 +604,21 @@ int fcoe_percpu_receive_thread(void *arg + } + + fp = (struct fc_frame *)skb; +- cp = (struct fcoe_crc_eof *)(skb->data + fr_len); + fc_frame_init(fp); + fr_dev(fp) = lp; + fr_sof(fp) = hp->fcoe_sof; +- fr_eof(fp) = cp->fcoe_eof; +- /* trim off the CRC and EOF trailer*/ +- skb_trim(skb, fr_len); ++ ++ /* Copy out the CRC and EOF trailer for access */ ++ if (skb_copy_bits(skb, fr_len, &crc_eof, sizeof(crc_eof))) { ++ kfree_skb(skb); ++ continue; ++ } ++ fr_eof(fp) = crc_eof.fcoe_eof; ++ fr_crc(fp) = crc_eof.fcoe_crc32; ++ if (pskb_trim(skb, fr_len)) { ++ kfree_skb(skb); ++ continue; ++ } + + /* + * We only check CRC if no offload is available and if it is +@@ -629,7 +637,7 @@ int fcoe_percpu_receive_thread(void *arg + continue; + } + if (fr_flags(fp) & FCPHF_CRC_UNCHECKED) { +- if (le32_to_cpu(cp->fcoe_crc32) != ++ if (le32_to_cpu(fr_crc(fp)) != + ~crc32(~0, skb->data, fr_len)) { + if (debug_fcoe || stats->InvalidCRCCount < 5) + printk(KERN_WARNING "fcoe: dropping " +--- a/drivers/scsi/libfc/fc_fcp.c ++++ b/drivers/scsi/libfc/fc_fcp.c +@@ -356,7 +356,7 @@ static void fc_fcp_recv_data(struct fc_f + len += 4 - (len % 4); + } + +- if (~crc != le32_to_cpu(*(__le32 *)(buf + len))) { ++ if (~crc != le32_to_cpu(fr_crc(fp))) { + crc_err: + stats = lp->dev_stats[smp_processor_id()]; + stats->ErrorFrames++; +--- a/drivers/scsi/libfc/fc_frame.c ++++ b/drivers/scsi/libfc/fc_frame.c +@@ -42,7 +42,7 @@ u32 fc_frame_crc_check(struct fc_frame * + len = (fr_len(fp) + 3) & ~3; /* round up length to include fill */ + bp = (const u8 *) fr_hdr(fp); + crc = ~crc32(~0, bp, len); +- error = crc ^ *(u32 *) (bp + len); ++ error = crc ^ fr_crc(fp); + return error; + } + EXPORT_SYMBOL(fc_frame_crc_check); +--- a/include/scsi/fc_frame.h ++++ b/include/scsi/fc_frame.h +@@ -56,6 +56,7 @@ + #define fr_max_payload(fp) (fr_cb(fp)->fr_max_payload) + #define fr_cmd(fp) (fr_cb(fp)->fr_cmd) + #define fr_dir(fp) (fr_cmd(fp)->sc_data_direction) ++#define fr_crc(fp) (fr_cb(fp)->fr_crc) + + struct fc_frame { + struct sk_buff skb; +@@ -66,12 +67,14 @@ struct fcoe_rcv_info { + struct fc_lport *fr_dev; /* transport layer private pointer */ + struct fc_seq *fr_seq; /* for use with exchange manager */ + struct scsi_cmnd *fr_cmd; /* for use of scsi command */ ++ u32 fr_crc; + u16 fr_max_payload; /* max FC payload */ + enum fc_sof fr_sof; /* start of frame delimiter */ + enum fc_eof fr_eof; /* end of frame delimiter */ + u8 fr_flags; /* flags - see below */ + }; + ++ + /* + * Get fc_frame pointer for an skb that's already been imported. + */ diff --git a/src/patches/suse-2.6.27.25/patches.drivers/libfc-pass-lport-in-exch_mgr_r.diff b/src/patches/suse-2.6.27.25/patches.drivers/libfc-pass-lport-in-exch_mgr_r.diff new file mode 100644 index 0000000000..dba8518b84 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/libfc-pass-lport-in-exch_mgr_r.diff @@ -0,0 +1,90 @@ +From: Abhijeet Joglekar +Subject: libfc: Pass lport in exch_mgr_reset +References: bnc #465596 + +fc_exch_mgr structure is private to fc_exch.c. To export exch_mgr_reset to +transport, transport needs access to the exch manager. Change +exch_mgr_reset to use lport param which is the shared structure between +libFC and transport. + +Alternatively, fc_exch_mgr definition can be moved to libfc.h so that lport +can be accessed from mp*. + +Signed-off-by: Abhijeet Joglekar +Acked-by: Bernhard Walle +--- + + drivers/scsi/libfc/fc_exch.c | 3 ++- + drivers/scsi/libfc/fc_lport.c | 4 ++-- + drivers/scsi/libfc/fc_rport.c | 4 ++-- + include/scsi/libfc.h | 4 ++-- + 4 files changed, 8 insertions(+), 7 deletions(-) + + +--- a/drivers/scsi/libfc/fc_exch.c ++++ b/drivers/scsi/libfc/fc_exch.c +@@ -1478,10 +1478,11 @@ static void fc_exch_reset(struct fc_exch + * If sid is non-zero, reset only exchanges we source from that FID. + * If did is non-zero, reset only exchanges destined to that FID. + */ +-void fc_exch_mgr_reset(struct fc_exch_mgr *mp, u32 sid, u32 did) ++void fc_exch_mgr_reset(struct fc_lport *lp, u32 sid, u32 did) + { + struct fc_exch *ep; + struct fc_exch *next; ++ struct fc_exch_mgr *mp = lp->emp; + + spin_lock_bh(&mp->em_lock); + restart: +--- a/drivers/scsi/libfc/fc_lport.c ++++ b/drivers/scsi/libfc/fc_lport.c +@@ -640,7 +640,7 @@ int fc_lport_destroy(struct fc_lport *lp + { + lport->tt.frame_send = fc_frame_drop; + lport->tt.fcp_abort_io(lport); +- lport->tt.exch_mgr_reset(lport->emp, 0, 0); ++ lport->tt.exch_mgr_reset(lport, 0, 0); + return 0; + } + EXPORT_SYMBOL(fc_lport_destroy); +@@ -951,7 +951,7 @@ static void fc_lport_enter_reset(struct + + lport->tt.disc_stop(lport); + +- lport->tt.exch_mgr_reset(lport->emp, 0, 0); ++ lport->tt.exch_mgr_reset(lport, 0, 0); + fc_host_fabric_name(lport->host) = 0; + fc_host_port_id(lport->host) = 0; + +--- a/drivers/scsi/libfc/fc_rport.c ++++ b/drivers/scsi/libfc/fc_rport.c +@@ -1302,7 +1302,7 @@ void fc_rport_terminate_io(struct fc_rpo + struct fc_rport_libfc_priv *rdata = rport->dd_data; + struct fc_lport *lport = rdata->local_port; + +- lport->tt.exch_mgr_reset(lport->emp, 0, rport->port_id); +- lport->tt.exch_mgr_reset(lport->emp, rport->port_id, 0); ++ lport->tt.exch_mgr_reset(lport, 0, rport->port_id); ++ lport->tt.exch_mgr_reset(lport, rport->port_id, 0); + } + EXPORT_SYMBOL(fc_rport_terminate_io); +--- a/include/scsi/libfc.h ++++ b/include/scsi/libfc.h +@@ -469,7 +469,7 @@ struct libfc_function_template { + * If s_id is non-zero, reset only exchanges originating from that FID. + * If d_id is non-zero, reset only exchanges sending to that FID. + */ +- void (*exch_mgr_reset)(struct fc_exch_mgr *, ++ void (*exch_mgr_reset)(struct fc_lport *, + u32 s_id, u32 d_id); + + void (*rport_flush_queue)(void); +@@ -908,7 +908,7 @@ struct fc_seq *fc_seq_start_next(struct + * If s_id is non-zero, reset only exchanges originating from that FID. + * If d_id is non-zero, reset only exchanges sending to that FID. + */ +-void fc_exch_mgr_reset(struct fc_exch_mgr *, u32 s_id, u32 d_id); ++void fc_exch_mgr_reset(struct fc_lport *, u32 s_id, u32 d_id); + + /* + * Functions for fc_functions_template diff --git a/src/patches/suse-2.6.27.25/patches.drivers/libfc-set-the-release-function.diff b/src/patches/suse-2.6.27.25/patches.drivers/libfc-set-the-release-function.diff new file mode 100644 index 0000000000..ea8cb0d588 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/libfc-set-the-release-function.diff @@ -0,0 +1,91 @@ +From: Robert Love +Subject: [FcOE] Set the release function for the rport's kobject (round 2) +References: bnc #459142 + +We need to be better about reference counting. The first +step is to make use of the release function that is called +when the reference count drops to 0. + +There was some inital push back by Joe on this patch. We +talked off-list and agreed that the benefit of not having +to check whether a rport is rogue or real overweighed the +fact that we might be using reference counting on objects +(rogue) that cannot be acted on by another thread. + +There is likely room for improvement here, but this should +be a stable start. + +Signed-off-by: Robert Love +Acked-by: Bernhard Walle +--- + + drivers/scsi/libfc/fc_rport.c | 18 ++++++++++-------- + include/scsi/libfc.h | 1 - + 2 files changed, 10 insertions(+), 9 deletions(-) + + +--- a/drivers/scsi/libfc/fc_rport.c ++++ b/drivers/scsi/libfc/fc_rport.c +@@ -93,6 +93,13 @@ static const char *fc_rport_state_names[ + [RPORT_ST_LOGO] = "LOGO", + }; + ++static void fc_rport_rogue_destroy(struct device *dev) ++{ ++ struct fc_rport *rport = dev_to_rport(dev); ++ FC_DEBUG_RPORT("Destroying rogue rport (%6x)\n", rport->port_id); ++ kfree(rport); ++} ++ + struct fc_rport *fc_rport_rogue_create(struct fc_disc_port *dp) + { + struct fc_rport *rport; +@@ -115,7 +122,7 @@ struct fc_rport *fc_rport_rogue_create(s + * upstream so it fine that this is really ugly and hacky right now. + */ + device_initialize(&rport->dev); +- rport->dev.release = fc_rport_rogue_destroy; // XXX: bwalle ++ rport->dev.release = fc_rport_rogue_destroy; + + mutex_init(&rdata->rp_mutex); + rdata->local_port = dp->lp; +@@ -137,11 +144,6 @@ struct fc_rport *fc_rport_rogue_create(s + return rport; + } + +-void fc_rport_rogue_destroy(struct fc_rport *rport) +-{ +- kfree(rport); +-} +- + /** + * fc_rport_state - return a string for the state the rport is in + * @rport: The rport whose state we want to get a string for +@@ -263,7 +265,7 @@ static void fc_rport_work(struct work_st + "(%6x).\n", ids.port_id); + event = RPORT_EV_FAILED; + } +- fc_rport_rogue_destroy(rport); ++ put_device(&rport->dev); + rport = new_rport; + rdata = new_rport->dd_data; + if (rport_ops->event_callback) +@@ -276,7 +278,7 @@ static void fc_rport_work(struct work_st + if (rport_ops->event_callback) + rport_ops->event_callback(lport, rport, event); + if (trans_state == FC_PORTSTATE_ROGUE) +- fc_rport_rogue_destroy(rport); ++ put_device(&rport->dev); + else + fc_remote_port_delete(rport); + } else +--- a/include/scsi/libfc.h ++++ b/include/scsi/libfc.h +@@ -169,7 +169,6 @@ struct fc_rport_libfc_priv { + (struct fc_rport_libfc_priv *)((void *)x + sizeof(struct fc_rport)); + + struct fc_rport *fc_rport_rogue_create(struct fc_disc_port *); +-void fc_rport_rogue_destroy(struct fc_rport *); + + static inline void fc_rport_set_name(struct fc_rport *rport, u64 wwpn, u64 wwnn) + { diff --git a/src/patches/suse-2.6.27.25/patches.drivers/libfc-updated-comment-for-orde.diff b/src/patches/suse-2.6.27.25/patches.drivers/libfc-updated-comment-for-orde.diff new file mode 100644 index 0000000000..02e8d8acbe --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/libfc-updated-comment-for-orde.diff @@ -0,0 +1,56 @@ +From: Vasu Dev +Subject: [FcOE] updated comment for order of em and ex locks +References: bnc #459142 + +The fc_exch is public but em_lock is static to fc_exch.c, +so updated comment only in fc_exch.c on order of these locks. + +Also removed seq.f_ctl from comments since this field is +already removed. + +Signed-off-by: Vasu Dev +Acked-by: Bernhard Walle +--- + + drivers/scsi/libfc/fc_exch.c | 5 ++++- + include/scsi/libfc.h | 5 ++--- + 2 files changed, 6 insertions(+), 4 deletions(-) + + +--- a/drivers/scsi/libfc/fc_exch.c ++++ b/drivers/scsi/libfc/fc_exch.c +@@ -68,7 +68,8 @@ static struct kmem_cache *fc_em_cachep; + */ + struct fc_exch_mgr { + enum fc_class class; /* default class for sequences */ +- spinlock_t em_lock; /* exchange manager lock */ ++ spinlock_t em_lock; /* exchange manager lock, ++ must be taken before ex_lock */ + u16 last_xid; /* last allocated exchange ID */ + u16 min_xid; /* min exchange ID */ + u16 max_xid; /* max exchange ID */ +@@ -179,6 +180,8 @@ static struct fc_seq *fc_seq_start_next_ + * sequence allocation and deallocation must be locked. + * - exchange refcnt can be done atomicly without locks. + * - sequence allocation must be locked by exch lock. ++ * - If the em_lock and ex_lock must be taken at the same time, then the ++ * em_lock must be taken before the ex_lock. + */ + + /* +--- a/include/scsi/libfc.h ++++ b/include/scsi/libfc.h +@@ -299,11 +299,10 @@ struct fc_seq { + /* + * Exchange. + * +- * Locking notes: The ex_lock protects changes to the following fields: +- * esb_stat, f_ctl, seq.ssb_stat, seq.f_ctl. ++ * Locking notes: The ex_lock protects following items: ++ * state, esb_stat, f_ctl, seq.ssb_stat + * seq_id + * sequence allocation +- * + */ + struct fc_exch { + struct fc_exch_mgr *em; /* exchange manager */ diff --git a/src/patches/suse-2.6.27.25/patches.drivers/libfc-updated-libfc-fcoe-modul.diff b/src/patches/suse-2.6.27.25/patches.drivers/libfc-updated-libfc-fcoe-modul.diff new file mode 100644 index 0000000000..a93906e7cb --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/libfc-updated-libfc-fcoe-modul.diff @@ -0,0 +1,35 @@ +From: Vasu Dev +Subject: [FcOE] updated libfc fcoe module ver to 1.0.6 +References: bnc #459142 + +Signed-off-by: Vasu Dev +Acked-by: Bernhard Walle +--- + + drivers/scsi/fcoe/libfcoe.c | 2 +- + drivers/scsi/libfc/fc_fcp.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + + +--- a/drivers/scsi/fcoe/libfcoe.c ++++ b/drivers/scsi/fcoe/libfcoe.c +@@ -59,7 +59,7 @@ static int debug_fcoe; + MODULE_AUTHOR("Open-FCoE.org"); + MODULE_DESCRIPTION("FCoE"); + MODULE_LICENSE("GPL"); +-MODULE_VERSION("1.0.5"); ++MODULE_VERSION("1.0.6"); + + /* fcoe host list */ + LIST_HEAD(fcoe_hostlist); +--- a/drivers/scsi/libfc/fc_fcp.c ++++ b/drivers/scsi/libfc/fc_fcp.c +@@ -42,7 +42,7 @@ + MODULE_AUTHOR("Open-FCoE.org"); + MODULE_DESCRIPTION("libfc"); + MODULE_LICENSE("GPL"); +-MODULE_VERSION("1.0.5"); ++MODULE_VERSION("1.0.6"); + + static int fc_fcp_debug; + diff --git a/src/patches/suse-2.6.27.25/patches.drivers/libfc-use-an-operations-struct.diff b/src/patches/suse-2.6.27.25/patches.drivers/libfc-use-an-operations-struct.diff new file mode 100644 index 0000000000..ec7f6a2662 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/libfc-use-an-operations-struct.diff @@ -0,0 +1,275 @@ +From: Robert Love +Subject: use an operations structure for rport callbacks +References: bnc #459142 + +This was called out for the disc callbacks in review +comments when submitting to linux-scsi. It needed to be +fixed for the rport callbacks too. + +This patch also fixes some spacing in the fc_rport +structure definition as well as renaming the fc_lport_rport_event() +function to fc_lport_rport_callback() to more clearly +identify what it's doing. + +Signed-off-by: Robert Love +Acked-by: Bernhard Walle +--- + + drivers/scsi/libfc/fc_disc.c | 18 +++++++++----- + drivers/scsi/libfc/fc_lport.c | 16 ++++++++----- + drivers/scsi/libfc/fc_rport.c | 19 +++++++-------- + include/scsi/libfc.h | 51 ++++++++++++++++++++++-------------------- + 4 files changed, 57 insertions(+), 47 deletions(-) + + +--- a/drivers/scsi/libfc/fc_disc.c ++++ b/drivers/scsi/libfc/fc_disc.c +@@ -154,7 +154,7 @@ void fc_disc_stop_rports(struct fc_disc + } + + /** +- * fc_disc_rport_event - Event handler for rport events ++ * fc_disc_rport_callback - Event handler for rport events + * @lport: The lport which is receiving the event + * @rport: The rport which the event has occured on + * @event: The event that occured +@@ -162,9 +162,9 @@ void fc_disc_stop_rports(struct fc_disc + * Locking Note: The rport lock should not be held when calling + * this function. + */ +-static void fc_disc_rport_event(struct fc_lport *lport, +- struct fc_rport *rport, +- enum fc_lport_event event) ++static void fc_disc_rport_callback(struct fc_lport *lport, ++ struct fc_rport *rport, ++ enum fc_rport_event event) + { + struct fc_rport_libfc_priv *rdata = rport->dd_data; + struct fc_disc *disc = lport->disc; +@@ -420,6 +420,10 @@ static void fc_disc_start(void (*disc_ca + mutex_unlock(&disc->disc_mutex); + } + ++static struct fc_rport_operations fc_disc_rport_ops = { ++ .event_callback = fc_disc_rport_callback, ++}; ++ + /** + * fc_disc_new_target - Handle new target found by discovery + * @lport: FC local port +@@ -475,7 +479,7 @@ static int fc_disc_new_target(struct fc_ + } + if (rport) { + rp = rport->dd_data; +- rp->event_callback = fc_disc_rport_event; ++ rp->ops = &fc_disc_rport_ops; + rp->rp_state = RPORT_ST_INIT; + lport->tt.rport_login(rport); + } +@@ -658,7 +662,7 @@ static int fc_disc_gpn_ft_parse(struct f + rport = fc_rport_rogue_create(&dp); + if (rport) { + rdata = rport->dd_data; +- rdata->event_callback = fc_disc_rport_event; ++ rdata->ops = &fc_disc_rport_ops; + rdata->local_port = lport; + lport->tt.rport_login(rport); + } else +@@ -812,7 +816,7 @@ static void fc_disc_single(struct fc_dis + new_rport = fc_rport_rogue_create(dp); + if (new_rport) { + rdata = new_rport->dd_data; +- rdata->event_callback = fc_disc_rport_event; ++ rdata->ops = &fc_disc_rport_ops; + kfree(dp); + lport->tt.rport_login(new_rport); + } +--- a/drivers/scsi/libfc/fc_lport.c ++++ b/drivers/scsi/libfc/fc_lport.c +@@ -38,7 +38,7 @@ + * hold the rport's mutex, but not the other way around. + * + * The only complication to this rule is the callbacks from the rport to +- * the lport's event_callback function. When rports become READY they make ++ * the lport's rport_callback function. When rports become READY they make + * a callback to the lport so that it can track them. In the case of the + * directory server that callback might cause the lport to change its + * state, implying that the lport mutex would need to be held. This problem +@@ -125,7 +125,7 @@ static int fc_frame_drop(struct fc_lport + } + + /** +- * fc_lport_rport_event - Event handler for rport events ++ * fc_lport_rport_callback - Event handler for rport events + * @lport: The lport which is receiving the event + * @rport: The rport which the event has occured on + * @event: The event that occured +@@ -133,9 +133,9 @@ static int fc_frame_drop(struct fc_lport + * Locking Note: The rport lock should not be held when calling + * this function. + */ +-static void fc_lport_rport_event(struct fc_lport *lport, +- struct fc_rport *rport, +- enum fc_lport_event event) ++static void fc_lport_rport_callback(struct fc_lport *lport, ++ struct fc_rport *rport, ++ enum fc_rport_event event) + { + FC_DEBUG_LPORT("Received a %d event for port (%6x)\n", event, + rport->port_id); +@@ -1265,6 +1265,10 @@ static void fc_lport_enter_rpn_id(struct + fc_lport_error(lport, fp); + } + ++static struct fc_rport_operations fc_lport_rport_ops = { ++ .event_callback = fc_lport_rport_callback, ++}; ++ + /** + * fc_rport_enter_dns - Create a rport to the name server + * @lport: Fibre Channel local port requesting a rport for the name server +@@ -1294,7 +1298,7 @@ static void fc_lport_enter_dns(struct fc + goto err; + + rdata = rport->dd_data; +- rdata->event_callback = fc_lport_rport_event; ++ rdata->ops = &fc_lport_rport_ops; + lport->tt.rport_login(rport); + return; + +--- a/drivers/scsi/libfc/fc_rport.c ++++ b/drivers/scsi/libfc/fc_rport.c +@@ -125,7 +125,7 @@ struct fc_rport *fc_rport_rogue_create(s + rdata->rp_state = RPORT_ST_INIT; + rdata->event = RPORT_EV_NONE; + rdata->flags = FC_RP_FLAGS_REC_SUPPORTED; +- rdata->event_callback = NULL; ++ rdata->ops = NULL; + rdata->e_d_tov = dp->lp->e_d_tov; + rdata->r_a_tov = dp->lp->r_a_tov; + INIT_DELAYED_WORK(&rdata->retry_work, fc_rport_timeout); +@@ -216,16 +216,15 @@ static void fc_rport_work(struct work_st + { + struct fc_rport_libfc_priv *rdata = + container_of(work, struct fc_rport_libfc_priv, event_work); +- enum fc_lport_event event; ++ enum fc_rport_event event; + enum fc_rport_trans_state trans_state; + struct fc_lport *lport = rdata->local_port; +- void (*event_callback)(struct fc_lport *, struct fc_rport *, +- enum fc_lport_event); ++ struct fc_rport_operations *rport_ops; + struct fc_rport *rport = PRIV_TO_RPORT(rdata); + + mutex_lock(&rdata->rp_mutex); + event = rdata->event; +- event_callback = rdata->event_callback; ++ rport_ops = rdata->ops; + + if (event == RPORT_EV_CREATED) { + struct fc_rport *new_rport; +@@ -250,7 +249,7 @@ static void fc_rport_work(struct work_st + new_rdata = new_rport->dd_data; + new_rdata->e_d_tov = rdata->e_d_tov; + new_rdata->r_a_tov = rdata->r_a_tov; +- new_rdata->event_callback = rdata->event_callback; ++ new_rdata->ops = rdata->ops; + new_rdata->local_port = rdata->local_port; + new_rdata->flags = FC_RP_FLAGS_REC_SUPPORTED; + new_rdata->trans_state = FC_PORTSTATE_REAL; +@@ -269,15 +268,15 @@ static void fc_rport_work(struct work_st + fc_rport_rogue_destroy(rport); + rport = new_rport; + rdata = new_rport->dd_data; +- if (event_callback) +- event_callback(lport, rport, event); ++ if (rport_ops->event_callback) ++ rport_ops->event_callback(lport, rport, event); + } else if ((event == RPORT_EV_FAILED) || + (event == RPORT_EV_LOGO) || + (event == RPORT_EV_STOP)) { + trans_state = rdata->trans_state; + mutex_unlock(&rdata->rp_mutex); +- if (event_callback) +- event_callback(lport, rport, event); ++ if (rport_ops->event_callback) ++ rport_ops->event_callback(lport, rport, event); + if (trans_state == FC_PORTSTATE_ROGUE) + fc_rport_rogue_destroy(rport); + else +--- a/include/scsi/libfc.h ++++ b/include/scsi/libfc.h +@@ -89,14 +89,6 @@ enum fc_disc_event { + DISC_EV_FAILED + }; + +-enum fc_lport_event { +- RPORT_EV_NONE = 0, +- RPORT_EV_CREATED, +- RPORT_EV_FAILED, +- RPORT_EV_STOP, +- RPORT_EV_LOGO +-}; +- + enum fc_rport_state { + RPORT_ST_NONE = 0, + RPORT_ST_INIT, /* initialized */ +@@ -126,6 +118,19 @@ struct fc_disc_port { + struct work_struct rport_work; + }; + ++enum fc_rport_event { ++ RPORT_EV_NONE = 0, ++ RPORT_EV_CREATED, ++ RPORT_EV_FAILED, ++ RPORT_EV_STOP, ++ RPORT_EV_LOGO ++}; ++ ++struct fc_rport_operations { ++ void (*event_callback)(struct fc_lport *, struct fc_rport *, ++ enum fc_rport_event); ++}; ++ + /** + * struct fc_rport_libfc_priv - libfc internal information about a remote port + * @local_port: Fibre Channel host port instance +@@ -140,24 +145,22 @@ struct fc_disc_port { + * @event_callback: Callback for rport READY, FAILED or LOGO + */ + struct fc_rport_libfc_priv { +- struct fc_lport *local_port; +- enum fc_rport_state rp_state; +- u16 flags; ++ struct fc_lport *local_port; ++ enum fc_rport_state rp_state; ++ u16 flags; + #define FC_RP_FLAGS_REC_SUPPORTED (1 << 0) + #define FC_RP_FLAGS_RETRY (1 << 1) +- u16 max_seq; +- unsigned int retries; +- unsigned int e_d_tov; +- unsigned int r_a_tov; +- enum fc_rport_trans_state trans_state; +- struct mutex rp_mutex; +- struct delayed_work retry_work; +- enum fc_lport_event event; +- void (*event_callback)(struct fc_lport *, +- struct fc_rport *, +- enum fc_lport_event); +- struct list_head peers; +- struct work_struct event_work; ++ u16 max_seq; ++ unsigned int retries; ++ unsigned int e_d_tov; ++ unsigned int r_a_tov; ++ enum fc_rport_trans_state trans_state; ++ struct mutex rp_mutex; ++ struct delayed_work retry_work; ++ enum fc_rport_event event; ++ struct fc_rport_operations *ops; ++ struct list_head peers; ++ struct work_struct event_work; + }; + + #define PRIV_TO_RPORT(x) \ diff --git a/src/patches/suse-2.6.27.25/patches.drivers/libfc-when-rport-goes-away-re.diff b/src/patches/suse-2.6.27.25/patches.drivers/libfc-when-rport-goes-away-re.diff new file mode 100644 index 0000000000..06519149c7 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/libfc-when-rport-goes-away-re.diff @@ -0,0 +1,41 @@ +From: Abhijeet Joglekar +Subject: libfc: when rport goes away (re-plogi), clean up exchanges to/from rport +References: bnc #465596 + +When a rport goes away, libFC does a plogi which will reset exchanges + at the rport. Clean exchanges at our end, both in transport and libFC. + If transport hooks into exch_mgr_reset, it will call back into + fc_exch_mgr_reset() to clean up libFC exchanges. + +Signed-off-by: Abhijeet Joglekar +Acked-by: Bernhard Walle +--- + + drivers/scsi/libfc/fc_rport.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + + +--- a/drivers/scsi/libfc/fc_rport.c ++++ b/drivers/scsi/libfc/fc_rport.c +@@ -215,6 +215,7 @@ static void fc_rport_state_enter(struct + + static void fc_rport_work(struct work_struct *work) + { ++ u32 port_id; + struct fc_rport_libfc_priv *rdata = + container_of(work, struct fc_rport_libfc_priv, event_work); + enum fc_rport_event event; +@@ -280,8 +281,12 @@ static void fc_rport_work(struct work_st + rport_ops->event_callback(lport, rport, event); + if (trans_state == FC_PORTSTATE_ROGUE) + put_device(&rport->dev); +- else ++ else { ++ port_id = rport->port_id; + fc_remote_port_delete(rport); ++ lport->tt.exch_mgr_reset(lport, 0, port_id); ++ lport->tt.exch_mgr_reset(lport, port_id, 0); ++ } + } else + mutex_unlock(&rdata->rp_mutex); + } diff --git a/src/patches/suse-2.6.27.25/patches.drivers/libfc_locking.diff b/src/patches/suse-2.6.27.25/patches.drivers/libfc_locking.diff new file mode 100644 index 0000000000..2ff9a4efcd --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/libfc_locking.diff @@ -0,0 +1,377 @@ +From: Vasu Dev +Subject: libfc, fcoe: fixed locking issues with lport->lp_mutex around lport->link_status +Patch-mainline: 6d235742e63f6b8912d8b200b75f9aa6d48f3e07 +References: bnc #468053 + +The fcoe_xmit could call fc_pause in case the pending skb queue len is larger +than FCOE_MAX_QUEUE_DEPTH, the fc_pause was trying to grab lport->lp_muex to +change lport->link_status and that had these issues :- + +1. The fcoe_xmit was getting called with bh disabled, thus causing +"BUG: scheduling while atomic" when grabbing lport->lp_muex with bh disabled. + +2. fc_linkup and fc_linkdown function calls lport_enter function with +lport->lp_mutex held and these enter function in turn calls fcoe_xmit to send +lport related FC frame, e.g. fc_linkup => fc_lport_enter_flogi to send flogi +req. In this case grabbing the same lport->lp_mutex again in fc_puase from +fcoe_xmit would cause deadlock. + +The lport->lp_mutex was used for setting FC_PAUSE in fcoe_xmit path but +FC_PAUSE bit was not used anywhere beside just setting and clear this +bit in lport->link_status, instead used a separate field qfull in fc_lport +to eliminate need for lport->lp_mutex to track pending queue full condition +and in turn avoid above described two locking issues. + +Also added check for lp->qfull in fc_fcp_lport_queue_ready to trigger +SCSI_MLQUEUE_HOST_BUSY when lp->qfull is set to prevent more scsi-ml cmds +while lp->qfull is set. + +This patch eliminated FC_LINK_UP and FC_PAUSE and instead used dedicated +fields in fc_lport for this, this simplified all related conditional +code. + +Also removed fc_pause and fc_unpause functions and instead used newly added +lport->qfull directly in fcoe. + +Also fixed a circular locking in fc_exch_recv_abts. + +These issues were blocking large file copy to a 2TB lun. + +Signed-off-by: Vasu Dev +Acked-by: Bernhard Walle +--- + drivers/scsi/fcoe/fcoe_sw.c | 6 +++--- + drivers/scsi/fcoe/libfcoe.c | 41 +++++++++++++++++------------------------ + drivers/scsi/libfc/fc_exch.c | 2 +- + drivers/scsi/libfc/fc_fcp.c | 6 +++--- + drivers/scsi/libfc/fc_lport.c | 38 +++++++------------------------------- + drivers/scsi/libfc/fc_rport.c | 2 +- + include/scsi/libfc.h | 12 ++---------- + 7 files changed, 34 insertions(+), 73 deletions(-) + +--- a/drivers/scsi/fcoe/fcoe_sw.c ++++ b/drivers/scsi/fcoe/fcoe_sw.c +@@ -116,7 +116,8 @@ static int fcoe_sw_lport_config(struct f + { + int i = 0; + +- lp->link_status = 0; ++ lp->link_up = 0; ++ lp->qfull = 0; + lp->max_retry_count = 3; + lp->e_d_tov = 2 * 1000; /* FC-FS default */ + lp->r_a_tov = 2 * 2 * 1000; +@@ -181,9 +182,8 @@ static int fcoe_sw_netdev_config(struct + if (fc_set_mfs(lp, mfs)) + return -EINVAL; + +- lp->link_status = ~FC_PAUSE & ~FC_LINK_UP; + if (!fcoe_link_ok(lp)) +- lp->link_status |= FC_LINK_UP; ++ lp->link_up = 1; + + /* offload features support */ + if (fc->real_dev->features & NETIF_F_SG) +--- a/drivers/scsi/fcoe/libfcoe.c ++++ b/drivers/scsi/fcoe/libfcoe.c +@@ -505,7 +505,7 @@ int fcoe_xmit(struct fc_lport *lp, struc + if (rc) { + fcoe_insert_wait_queue(lp, skb); + if (fc->fcoe_pending_queue.qlen > FCOE_MAX_QUEUE_DEPTH) +- fc_pause(lp); ++ lp->qfull = 1; + } + + return 0; +@@ -719,7 +719,7 @@ static void fcoe_recv_flogi(struct fcoe_ + * fcoe_watchdog - fcoe timer callback + * @vp: + * +- * This checks the pending queue length for fcoe and put fcoe to be paused state ++ * This checks the pending queue length for fcoe and set lport qfull + * if the FCOE_MAX_QUEUE_DEPTH is reached. This is done for all fc_lport on the + * fcoe_hostlist. + * +@@ -729,17 +729,17 @@ void fcoe_watchdog(ulong vp) + { + struct fc_lport *lp; + struct fcoe_softc *fc; +- int paused = 0; ++ int qfilled = 0; + + read_lock(&fcoe_hostlist_lock); + list_for_each_entry(fc, &fcoe_hostlist, list) { + lp = fc->lp; + if (lp) { + if (fc->fcoe_pending_queue.qlen > FCOE_MAX_QUEUE_DEPTH) +- paused = 1; ++ qfilled = 1; + if (fcoe_check_wait_queue(lp) < FCOE_MAX_QUEUE_DEPTH) { +- if (paused) +- fc_unpause(lp); ++ if (qfilled) ++ lp->qfull = 0; + } + } + } +@@ -768,8 +768,7 @@ void fcoe_watchdog(ulong vp) + **/ + static int fcoe_check_wait_queue(struct fc_lport *lp) + { +- int rc, unpause = 0; +- int paused = 0; ++ int rc; + struct sk_buff *skb; + struct fcoe_softc *fc; + +@@ -777,10 +776,10 @@ static int fcoe_check_wait_queue(struct + spin_lock_bh(&fc->fcoe_pending_queue.lock); + + /* +- * is this interface paused? ++ * if interface pending queue full then set qfull in lport. + */ + if (fc->fcoe_pending_queue.qlen > FCOE_MAX_QUEUE_DEPTH) +- paused = 1; ++ lp->qfull = 1; + if (fc->fcoe_pending_queue.qlen) { + while ((skb = __skb_dequeue(&fc->fcoe_pending_queue)) != NULL) { + spin_unlock_bh(&fc->fcoe_pending_queue.lock); +@@ -792,11 +791,9 @@ static int fcoe_check_wait_queue(struct + spin_lock_bh(&fc->fcoe_pending_queue.lock); + } + if (fc->fcoe_pending_queue.qlen < FCOE_MAX_QUEUE_DEPTH) +- unpause = 1; ++ lp->qfull = 0; + } + spin_unlock_bh(&fc->fcoe_pending_queue.lock); +- if ((unpause) && (paused)) +- fc_unpause(lp); + return fc->fcoe_pending_queue.qlen; + } + +@@ -874,7 +871,7 @@ static int fcoe_device_notification(stru + struct net_device *real_dev = ptr; + struct fcoe_softc *fc; + struct fcoe_dev_stats *stats; +- u16 new_status; ++ u32 new_link_up; + u32 mfs; + int rc = NOTIFY_OK; + +@@ -891,17 +888,15 @@ static int fcoe_device_notification(stru + goto out; + } + +- new_status = lp->link_status; ++ new_link_up = lp->link_up; + switch (event) { + case NETDEV_DOWN: + case NETDEV_GOING_DOWN: +- new_status &= ~FC_LINK_UP; ++ new_link_up = 0; + break; + case NETDEV_UP: + case NETDEV_CHANGE: +- new_status &= ~FC_LINK_UP; +- if (!fcoe_link_ok(lp)) +- new_status |= FC_LINK_UP; ++ new_link_up = !fcoe_link_ok(lp); + break; + case NETDEV_CHANGEMTU: + mfs = fc->real_dev->mtu - +@@ -909,17 +904,15 @@ static int fcoe_device_notification(stru + sizeof(struct fcoe_crc_eof)); + if (mfs >= FC_MIN_MAX_FRAME) + fc_set_mfs(lp, mfs); +- new_status &= ~FC_LINK_UP; +- if (!fcoe_link_ok(lp)) +- new_status |= FC_LINK_UP; ++ new_link_up = !fcoe_link_ok(lp); + break; + case NETDEV_REGISTER: + break; + default: + FC_DBG("unknown event %ld call", event); + } +- if (lp->link_status != new_status) { +- if ((new_status & FC_LINK_UP) == FC_LINK_UP) ++ if (lp->link_up != new_link_up) { ++ if (new_link_up) + fc_linkup(lp); + else { + stats = lp->dev_stats[smp_processor_id()]; +--- a/drivers/scsi/libfc/fc_exch.c ++++ b/drivers/scsi/libfc/fc_exch.c +@@ -1096,7 +1096,7 @@ static void fc_exch_recv_abts(struct fc_ + ap->ba_high_seq_cnt = fh->fh_seq_cnt; + ap->ba_low_seq_cnt = htons(sp->cnt); + } +- sp = fc_seq_start_next(sp); ++ sp = fc_seq_start_next_locked(sp); + spin_unlock_bh(&ep->ex_lock); + fc_seq_send_last(sp, fp, FC_RCTL_BA_ACC, FC_TYPE_BLS); + fc_frame_free(rx_fp); +--- a/drivers/scsi/libfc/fc_fcp.c ++++ b/drivers/scsi/libfc/fc_fcp.c +@@ -20,13 +20,13 @@ + */ + + #include ++#include + #include + #include + #include + #include + #include + #include +-#include + + #include + #include +@@ -1622,7 +1622,7 @@ out: + static inline int fc_fcp_lport_queue_ready(struct fc_lport *lp) + { + /* lock ? */ +- return (lp->state == LPORT_ST_READY) && (lp->link_status & FC_LINK_UP); ++ return (lp->state == LPORT_ST_READY) && lp->link_up && !lp->qfull; + } + + /** +@@ -1891,7 +1891,7 @@ int fc_eh_abort(struct scsi_cmnd *sc_cmd + lp = shost_priv(sc_cmd->device->host); + if (lp->state != LPORT_ST_READY) + return rc; +- else if (!(lp->link_status & FC_LINK_UP)) ++ else if (!lp->link_up) + return rc; + + spin_lock_irqsave(lp->host->host_lock, flags); +--- a/drivers/scsi/libfc/fc_lport.c ++++ b/drivers/scsi/libfc/fc_lport.c +@@ -250,7 +250,7 @@ void fc_get_host_port_state(struct Scsi_ + { + struct fc_lport *lp = shost_priv(shost); + +- if ((lp->link_status & FC_LINK_UP) == FC_LINK_UP) ++ if (lp->link_up) + fc_host_port_state(shost) = FC_PORTSTATE_ONLINE; + else + fc_host_port_state(shost) = FC_PORTSTATE_OFFLINE; +@@ -484,7 +484,7 @@ static void fc_lport_recv_rnid_req(struc + * @sp: current sequence in the ADISC exchange + * @fp: ADISC request frame + * +- * Locking Note: The lport lock is exected to be held before calling ++ * Locking Note: The lport lock is expected to be held before calling + * this function. + */ + static void fc_lport_recv_adisc_req(struct fc_seq *sp, struct fc_frame *in_fp, +@@ -577,8 +577,8 @@ void fc_linkup(struct fc_lport *lport) + fc_host_port_id(lport->host)); + + mutex_lock(&lport->lp_mutex); +- if ((lport->link_status & FC_LINK_UP) != FC_LINK_UP) { +- lport->link_status |= FC_LINK_UP; ++ if (!lport->link_up) { ++ lport->link_up = 1; + + if (lport->state == LPORT_ST_RESET) + fc_lport_enter_flogi(lport); +@@ -597,8 +597,8 @@ void fc_linkdown(struct fc_lport *lport) + FC_DEBUG_LPORT("Link is down for port (%6x)\n", + fc_host_port_id(lport->host)); + +- if ((lport->link_status & FC_LINK_UP) == FC_LINK_UP) { +- lport->link_status &= ~(FC_LINK_UP); ++ if (lport->link_up) { ++ lport->link_up = 0; + fc_lport_enter_reset(lport); + lport->tt.fcp_cleanup(lport); + } +@@ -607,30 +607,6 @@ void fc_linkdown(struct fc_lport *lport) + EXPORT_SYMBOL(fc_linkdown); + + /** +- * fc_pause - Pause the flow of frames +- * @lport: The lport to be paused +- */ +-void fc_pause(struct fc_lport *lport) +-{ +- mutex_lock(&lport->lp_mutex); +- lport->link_status |= FC_PAUSE; +- mutex_unlock(&lport->lp_mutex); +-} +-EXPORT_SYMBOL(fc_pause); +- +-/** +- * fc_unpause - Unpause the flow of frames +- * @lport: The lport to be unpaused +- */ +-void fc_unpause(struct fc_lport *lport) +-{ +- mutex_lock(&lport->lp_mutex); +- lport->link_status &= ~(FC_PAUSE); +- mutex_unlock(&lport->lp_mutex); +-} +-EXPORT_SYMBOL(fc_unpause); +- +-/** + * fc_fabric_logoff - Logout of the fabric + * @lport: fc_lport pointer to logoff the fabric + * +@@ -977,7 +953,7 @@ static void fc_lport_enter_reset(struct + fc_host_fabric_name(lport->host) = 0; + fc_host_port_id(lport->host) = 0; + +- if ((lport->link_status & FC_LINK_UP) == FC_LINK_UP) ++ if (lport->link_up) + fc_lport_enter_flogi(lport); + } + +--- a/drivers/scsi/libfc/fc_rport.c ++++ b/drivers/scsi/libfc/fc_rport.c +@@ -425,7 +425,7 @@ static void fc_rport_error(struct fc_rpo + PTR_ERR(fp), fc_rport_state(rport), rdata->retries); + + if (!fp || PTR_ERR(fp) == -FC_EX_TIMEOUT) { +- /* ++ /* + * Memory allocation failure, or the exchange timed out. + * Retry after delay + */ +--- a/include/scsi/libfc.h ++++ b/include/scsi/libfc.h +@@ -68,9 +68,6 @@ + /* + * FC HBA status + */ +-#define FC_PAUSE (1 << 1) +-#define FC_LINK_UP (1 << 0) +- + enum fc_lport_state { + LPORT_ST_NONE = 0, + LPORT_ST_FLOGI, +@@ -603,7 +600,8 @@ struct fc_lport { + + /* Operational Information */ + struct libfc_function_template tt; +- u16 link_status; ++ u8 link_up; ++ u8 qfull; + enum fc_lport_state state; + unsigned long boot_time; + +@@ -704,12 +702,6 @@ void fc_linkup(struct fc_lport *); + void fc_linkdown(struct fc_lport *); + + /* +- * Pause and unpause traffic. +- */ +-void fc_pause(struct fc_lport *); +-void fc_unpause(struct fc_lport *); +- +-/* + * Configure the local port. + */ + int fc_lport_config(struct fc_lport *); diff --git a/src/patches/suse-2.6.27.25/patches.drivers/libfc_rport.diff b/src/patches/suse-2.6.27.25/patches.drivers/libfc_rport.diff new file mode 100644 index 0000000000..92b0599d83 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.drivers/libfc_rport.diff @@ -0,0 +1,265 @@ +From: Chris Leech +Subject: libfc: rport retry on LS_RJT from certain ELS +Patch-mainline: 6147a1194ba86af4266f36c9522a7b0040af98fe +References: bnc #468054 + +This allows any rport ELS to retry on LS_RJT. + +The rport error handling would only retry on resource allocation failures +and exchange timeouts. I have a target that will occasionally reject PLOGI +when we do a quick LOGO/PLOGI. When a critical ELS was rejected, libfc would +fail silently leaving the rport in a dead state. + +The retry count and delay are managed by fc_rport_error_retry. If the retry +count is exceeded fc_rport_error will be called. When retrying is not the +correct course of action, fc_rport_error can be called directly. + +Signed-off-by: Chris Leech +Acked-by: Bernhard Walle +--- + drivers/scsi/libfc/fc_exch.c | 2 + drivers/scsi/libfc/fc_rport.c | 111 ++++++++++++++++++++++++------------------ + include/scsi/fc/fc_fs.h | 5 + + 3 files changed, 69 insertions(+), 49 deletions(-) + +--- a/drivers/scsi/libfc/fc_exch.c ++++ b/drivers/scsi/libfc/fc_exch.c +@@ -32,8 +32,6 @@ + #include + #include + +-#define FC_DEF_R_A_TOV (10 * 1000) /* resource allocation timeout */ +- + /* + * fc_exch_debug can be set in debugger or at compile time to get more logs. + */ +--- a/drivers/scsi/libfc/fc_rport.c ++++ b/drivers/scsi/libfc/fc_rport.c +@@ -81,6 +81,7 @@ static void fc_rport_recv_logo_req(struc + struct fc_seq *, struct fc_frame *); + static void fc_rport_timeout(struct work_struct *); + static void fc_rport_error(struct fc_rport *, struct fc_frame *); ++static void fc_rport_error_retry(struct fc_rport *, struct fc_frame *); + static void fc_rport_work(struct work_struct *); + + static const char *fc_rport_state_names[] = { +@@ -405,55 +406,71 @@ static void fc_rport_timeout(struct work + } + + /** +- * fc_rport_error - Handler for any errors ++ * fc_rport_error - Error handler, called once retries have been exhausted + * @rport: The fc_rport object + * @fp: The frame pointer + * +- * If the error was caused by a resource allocation failure +- * then wait for half a second and retry, otherwise retry +- * immediately. +- * + * Locking Note: The rport lock is expected to be held before + * calling this routine + */ + static void fc_rport_error(struct fc_rport *rport, struct fc_frame *fp) + { + struct fc_rport_libfc_priv *rdata = rport->dd_data; +- unsigned long delay = 0; + + FC_DEBUG_RPORT("Error %ld in state %s, retries %d\n", + PTR_ERR(fp), fc_rport_state(rport), rdata->retries); + +- if (!fp || PTR_ERR(fp) == -FC_EX_TIMEOUT) { +- /* +- * Memory allocation failure, or the exchange timed out. +- * Retry after delay +- */ +- if (rdata->retries < rdata->local_port->max_retry_count) { +- rdata->retries++; +- if (!fp) +- delay = msecs_to_jiffies(500); +- get_device(&rport->dev); +- schedule_delayed_work(&rdata->retry_work, delay); +- } else { +- switch (rdata->rp_state) { +- case RPORT_ST_PLOGI: +- case RPORT_ST_PRLI: +- case RPORT_ST_LOGO: +- rdata->event = RPORT_EV_FAILED; +- queue_work(rport_event_queue, +- &rdata->event_work); +- break; +- case RPORT_ST_RTV: +- fc_rport_enter_ready(rport); +- break; +- case RPORT_ST_NONE: +- case RPORT_ST_READY: +- case RPORT_ST_INIT: +- break; +- } +- } ++ switch (rdata->rp_state) { ++ case RPORT_ST_PLOGI: ++ case RPORT_ST_PRLI: ++ case RPORT_ST_LOGO: ++ rdata->event = RPORT_EV_FAILED; ++ queue_work(rport_event_queue, ++ &rdata->event_work); ++ break; ++ case RPORT_ST_RTV: ++ fc_rport_enter_ready(rport); ++ break; ++ case RPORT_ST_NONE: ++ case RPORT_ST_READY: ++ case RPORT_ST_INIT: ++ break; ++ } ++} ++ ++/** ++ * fc_rport_error_retry - Error handler when retries are desired ++ * @rport: The fc_rport object ++ * @fp: The frame pointer ++ * ++ * If the error was an exchange timeout retry immediately, ++ * otherwise wait for E_D_TOV. ++ * ++ * Locking Note: The rport lock is expected to be held before ++ * calling this routine ++ */ ++static void fc_rport_error_retry(struct fc_rport *rport, struct fc_frame *fp) ++{ ++ struct fc_rport_libfc_priv *rdata = rport->dd_data; ++ unsigned long delay = FC_DEF_E_D_TOV; ++ ++ /* make sure this isn't an FC_EX_CLOSED error, never retry those */ ++ if (PTR_ERR(fp) == -FC_EX_CLOSED) ++ return fc_rport_error(rport, fp); ++ ++ if (rdata->retries < rdata->local_port->max_retry_count) { ++ FC_DEBUG_RPORT("Error %ld in state %s, retrying\n", ++ PTR_ERR(fp), fc_rport_state(rport)); ++ rdata->retries++; ++ /* no additional delay on exchange timeouts */ ++ if (PTR_ERR(fp) == -FC_EX_TIMEOUT) ++ delay = 0; ++ get_device(&rport->dev); ++ schedule_delayed_work(&rdata->retry_work, delay); ++ return; + } ++ ++ return fc_rport_error(rport, fp); + } + + /** +@@ -490,7 +507,7 @@ static void fc_rport_plogi_resp(struct f + } + + if (IS_ERR(fp)) { +- fc_rport_error(rport, fp); ++ fc_rport_error_retry(rport, fp); + goto err; + } + +@@ -522,7 +539,7 @@ static void fc_rport_plogi_resp(struct f + else + fc_rport_enter_prli(rport); + } else +- fc_rport_error(rport, fp); ++ fc_rport_error_retry(rport, fp); + + out: + fc_frame_free(fp); +@@ -552,14 +569,14 @@ static void fc_rport_enter_plogi(struct + rport->maxframe_size = FC_MIN_MAX_PAYLOAD; + fp = fc_frame_alloc(lport, sizeof(struct fc_els_flogi)); + if (!fp) { +- fc_rport_error(rport, fp); ++ fc_rport_error_retry(rport, fp); + return; + } + rdata->e_d_tov = lport->e_d_tov; + + if (!lport->tt.elsct_send(lport, rport, fp, ELS_PLOGI, + fc_rport_plogi_resp, rport, lport->e_d_tov)) +- fc_rport_error(rport, fp); ++ fc_rport_error_retry(rport, fp); + else + get_device(&rport->dev); + } +@@ -599,7 +616,7 @@ static void fc_rport_prli_resp(struct fc + } + + if (IS_ERR(fp)) { +- fc_rport_error(rport, fp); ++ fc_rport_error_retry(rport, fp); + goto err; + } + +@@ -657,7 +674,7 @@ static void fc_rport_logo_resp(struct fc + rport->port_id); + + if (IS_ERR(fp)) { +- fc_rport_error(rport, fp); ++ fc_rport_error_retry(rport, fp); + goto err; + } + +@@ -707,13 +724,13 @@ static void fc_rport_enter_prli(struct f + + fp = fc_frame_alloc(lport, sizeof(*pp)); + if (!fp) { +- fc_rport_error(rport, fp); ++ fc_rport_error_retry(rport, fp); + return; + } + + if (!lport->tt.elsct_send(lport, rport, fp, ELS_PRLI, + fc_rport_prli_resp, rport, lport->e_d_tov)) +- fc_rport_error(rport, fp); ++ fc_rport_error_retry(rport, fp); + else + get_device(&rport->dev); + } +@@ -804,13 +821,13 @@ static void fc_rport_enter_rtv(struct fc + + fp = fc_frame_alloc(lport, sizeof(struct fc_els_rtv)); + if (!fp) { +- fc_rport_error(rport, fp); ++ fc_rport_error_retry(rport, fp); + return; + } + + if (!lport->tt.elsct_send(lport, rport, fp, ELS_RTV, + fc_rport_rtv_resp, rport, lport->e_d_tov)) +- fc_rport_error(rport, fp); ++ fc_rport_error_retry(rport, fp); + else + get_device(&rport->dev); + } +@@ -835,13 +852,13 @@ static void fc_rport_enter_logo(struct f + + fp = fc_frame_alloc(lport, sizeof(struct fc_els_logo)); + if (!fp) { +- fc_rport_error(rport, fp); ++ fc_rport_error_retry(rport, fp); + return; + } + + if (!lport->tt.elsct_send(lport, rport, fp, ELS_LOGO, + fc_rport_logo_resp, rport, lport->e_d_tov)) +- fc_rport_error(rport, fp); ++ fc_rport_error_retry(rport, fp); + else + get_device(&rport->dev); + } +--- a/include/scsi/fc/fc_fs.h ++++ b/include/scsi/fc/fc_fs.h +@@ -337,4 +337,9 @@ enum fc_pf_rjt_reason { + FC_RJT_VENDOR = 0xff, /* vendor specific reject */ + }; + ++/* default timeout values */ ++ ++#define FC_DEF_E_D_TOV 2000UL ++#define FC_DEF_R_A_TOV 10000UL ++ + #endif /* _FC_FS_H_ */ diff --git a/src/patches/suse-2.6.27.25/patches.fixes/b43legacy-fix-led_device_naming.diff b/src/patches/suse-2.6.27.25/patches.fixes/b43legacy-fix-led_device_naming.diff new file mode 100644 index 0000000000..b79e5a6ef1 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/b43legacy-fix-led_device_naming.diff @@ -0,0 +1,54 @@ +From: Danny Kukawka +Subject: b43legacy: fix led naming + +Fixed led device naming for the b43legacy driver. Due to the +documentation of the led subsystem/class the naming should be +"devicename:colour:function" while not applying sections +should be left blank. + +This should lead to e.g. "b43legacy-%s::rx" instead of +"b43legacy-%s:rx". + +Signed-off-by: Danny Kukawka +-- + leds.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/wireless/b43legacy/leds.c b/drivers/net/wireless/b43legacy/leds.c +index cacb786..cb4511f 100644 +--- a/drivers/net/wireless/b43legacy/leds.c ++++ b/drivers/net/wireless/b43legacy/leds.c +@@ -146,12 +146,12 @@ static void b43legacy_map_led(struct b43legacy_wldev *dev, + case B43legacy_LED_TRANSFER: + case B43legacy_LED_APTRANSFER: + snprintf(name, sizeof(name), +- "b43legacy-%s:tx", wiphy_name(hw->wiphy)); ++ "b43legacy-%s::tx", wiphy_name(hw->wiphy)); + b43legacy_register_led(dev, &dev->led_tx, name, + ieee80211_get_tx_led_name(hw), + led_index, activelow); + snprintf(name, sizeof(name), +- "b43legacy-%s:rx", wiphy_name(hw->wiphy)); ++ "b43legacy-%s::rx", wiphy_name(hw->wiphy)); + b43legacy_register_led(dev, &dev->led_rx, name, + ieee80211_get_rx_led_name(hw), + led_index, activelow); +@@ -161,7 +161,7 @@ static void b43legacy_map_led(struct b43legacy_wldev *dev, + case B43legacy_LED_RADIO_B: + case B43legacy_LED_MODE_BG: + snprintf(name, sizeof(name), +- "b43legacy-%s:radio", wiphy_name(hw->wiphy)); ++ "b43legacy-%s::radio", wiphy_name(hw->wiphy)); + b43legacy_register_led(dev, &dev->led_radio, name, + b43legacy_rfkill_led_name(dev), + led_index, activelow); +@@ -172,7 +172,7 @@ static void b43legacy_map_led(struct b43legacy_wldev *dev, + case B43legacy_LED_WEIRD: + case B43legacy_LED_ASSOC: + snprintf(name, sizeof(name), +- "b43legacy-%s:assoc", wiphy_name(hw->wiphy)); ++ "b43legacy-%s::assoc", wiphy_name(hw->wiphy)); + b43legacy_register_led(dev, &dev->led_assoc, name, + ieee80211_get_assoc_led_name(hw), + led_index, activelow); + diff --git a/src/patches/suse-2.6.27.25/patches.fixes/ext2_mtime_update_on_rename.diff b/src/patches/suse-2.6.27.25/patches.fixes/ext2_mtime_update_on_rename.diff new file mode 100644 index 0000000000..15d5a50179 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/ext2_mtime_update_on_rename.diff @@ -0,0 +1,31 @@ +From: Jan Kara +Subject: [PATCH] ext2: Do not update mtime of a move directory when parent has not changed +References: bnc#493392 +Patch-mainline: 2.6.30 + +If the parent of the moved directory has not changed, there's no real +reason to change mtime. Specs doesn't seem to say anything about this +particular case and e.g. ext3 does not change mtime in this case. +So we become a tiny bit more consistent. + +Spotted by ronny.pretzsch@dfs.de, initial fix by Jörn Engel . + +Signed-off-by: Jan Kara +--- + fs/ext2/namei.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/fs/ext2/namei.c ++++ b/fs/ext2/namei.c +@@ -355,7 +355,10 @@ static int ext2_rename (struct inode * o + inode_dec_link_count(old_inode); + + if (dir_de) { +- ext2_set_link(old_inode, dir_de, dir_page, new_dir); ++ /* Set link only if parent has changed and thus avoid setting ++ * of mtime of the moved directory on a pure rename. */ ++ if (old_dir != new_dir) ++ ext2_set_link(old_inode, dir_de, dir_page, new_dir); + inode_dec_link_count(old_dir); + } + return 0; diff --git a/src/patches/suse-2.6.27.25/patches.fixes/ext3_false_EIO_fix.diff b/src/patches/suse-2.6.27.25/patches.fixes/ext3_false_EIO_fix.diff new file mode 100644 index 0000000000..4b8bf1ecfe --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/ext3_false_EIO_fix.diff @@ -0,0 +1,172 @@ +From: Jan Kara +Subject: [PATCH] ext3: Avoid false EIO errors +References: bnc#479730 + +Sometimes block_write_begin() can map buffers in a page but later we fail to +copy data into those buffers (because the source page has been paged out in the +mean time). We then end up with !uptodate mapped buffers. To add a bit more to +the confusion, block_write_end() does not commit any data (and thus does not +any mark buffers as uptodate) if we didn't succeed with copying all the data. + +Commit f4fc66a894546bdc88a775d0e83ad20a65210bcb (ext3: convert to new aops) +missed these cases and thus we were inserting non-uptodate buffers to +transaction's list which confuses JBD code and it reports IO errors, aborts +a transaction and generally makes users afraid about their data ;-P. + +This patch fixes the problem by reorganizing ext3_..._write_end() code to +first call block_write_end() to mark buffers with valid data uptodate and +after that we file only uptodate buffers to transaction's lists. Also +fix a problem where we could leave blocks allocated beyond i_size (i_disksize +in fact). + +Signed-off-by: Jan Kara + +--- + fs/ext3/inode.c | 99 +++++++++++++++++++++++--------------------------------- + 1 file changed, 42 insertions(+), 57 deletions(-) + +--- a/fs/ext3/inode.c ++++ b/fs/ext3/inode.c +@@ -1195,6 +1195,18 @@ int ext3_journal_dirty_data(handle_t *ha + return err; + } + ++/* For ordered writepage and write_end functions */ ++static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) ++{ ++ /* ++ * Write could have mapped the buffer but it didn't copy the data in ++ * yet. So avoid filing such buffer into a transaction. ++ */ ++ if (buffer_mapped(bh) && buffer_uptodate(bh)) ++ return ext3_journal_dirty_data(handle, bh); ++ return 0; ++} ++ + /* For write_end() in data=journal mode */ + static int write_end_fn(handle_t *handle, struct buffer_head *bh) + { +@@ -1205,26 +1217,29 @@ static int write_end_fn(handle_t *handle + } + + /* +- * Generic write_end handler for ordered and writeback ext3 journal modes. +- * We can't use generic_write_end, because that unlocks the page and we need to +- * unlock the page after ext3_journal_stop, but ext3_journal_stop must run +- * after block_write_end. ++ * This is nasty and subtle: ext3_write_begin() could have allocated blocks ++ * for the whole page but later we failed to copy the data in. So the disk ++ * size we really have allocated is pos + len (block_write_end() has zeroed ++ * the freshly allocated buffers so we aren't going to write garbage). But we ++ * want to keep i_size at the place where data copying finished so that we ++ * don't confuse readers. The worst what can happen is that we expose a page ++ * of zeros at the end of file after a crash... + */ +-static int ext3_generic_write_end(struct file *file, +- struct address_space *mapping, +- loff_t pos, unsigned len, unsigned copied, +- struct page *page, void *fsdata) ++static void update_file_sizes(struct inode *inode, loff_t pos, unsigned len, ++ unsigned copied) + { +- struct inode *inode = file->f_mapping->host; ++ int mark_dirty = 0; + +- copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); +- +- if (pos+copied > inode->i_size) { +- i_size_write(inode, pos+copied); +- mark_inode_dirty(inode); ++ if (pos + len > EXT3_I(inode)->i_disksize) { ++ mark_dirty = 1; ++ EXT3_I(inode)->i_disksize = pos + len; + } +- +- return copied; ++ if (pos + copied > inode->i_size) { ++ i_size_write(inode, pos + copied); ++ mark_dirty = 1; ++ } ++ if (mark_dirty) ++ mark_inode_dirty(inode); + } + + /* +@@ -1244,29 +1259,17 @@ static int ext3_ordered_write_end(struct + unsigned from, to; + int ret = 0, ret2; + ++ copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); ++ ++ /* See comment at update_file_sizes() for why we check buffers upto ++ * from + len */ + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; +- + ret = walk_page_buffers(handle, page_buffers(page), +- from, to, NULL, ext3_journal_dirty_data); ++ from, to, NULL, journal_dirty_data_fn); + +- if (ret == 0) { +- /* +- * generic_write_end() will run mark_inode_dirty() if i_size +- * changes. So let's piggyback the i_disksize mark_inode_dirty +- * into that. +- */ +- loff_t new_i_size; +- +- new_i_size = pos + copied; +- if (new_i_size > EXT3_I(inode)->i_disksize) +- EXT3_I(inode)->i_disksize = new_i_size; +- ret2 = ext3_generic_write_end(file, mapping, pos, len, copied, +- page, fsdata); +- copied = ret2; +- if (ret2 < 0) +- ret = ret2; +- } ++ if (ret == 0) ++ update_file_sizes(inode, pos, len, copied); + ret2 = ext3_journal_stop(handle); + if (!ret) + ret = ret2; +@@ -1283,22 +1286,11 @@ static int ext3_writeback_write_end(stru + { + handle_t *handle = ext3_journal_current_handle(); + struct inode *inode = file->f_mapping->host; +- int ret = 0, ret2; +- loff_t new_i_size; ++ int ret; + +- new_i_size = pos + copied; +- if (new_i_size > EXT3_I(inode)->i_disksize) +- EXT3_I(inode)->i_disksize = new_i_size; +- +- ret2 = ext3_generic_write_end(file, mapping, pos, len, copied, +- page, fsdata); +- copied = ret2; +- if (ret2 < 0) +- ret = ret2; +- +- ret2 = ext3_journal_stop(handle); +- if (!ret) +- ret = ret2; ++ copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); ++ update_file_sizes(inode, pos, len, copied); ++ ret = ext3_journal_stop(handle); + unlock_page(page); + page_cache_release(page); + +@@ -1412,13 +1404,6 @@ static int bput_one(handle_t *handle, st + return 0; + } + +-static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) +-{ +- if (buffer_mapped(bh)) +- return ext3_journal_dirty_data(handle, bh); +- return 0; +-} +- + /* + * Note that we always start a transaction even if we're not journalling + * data. This is to preserve ordering: any hole instantiation within diff --git a/src/patches/suse-2.6.27.25/patches.fixes/ia64-sparse-fixes.diff b/src/patches/suse-2.6.27.25/patches.fixes/ia64-sparse-fixes.diff new file mode 100644 index 0000000000..c923fc074c --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/ia64-sparse-fixes.diff @@ -0,0 +1,54 @@ +From: Jan Blunck +Subject: ia64-kvm: fix sparse warnings + +This patch fixes some sparse warning about dubious one-bit signed bitfield. + +Signed-off-by: Jan Blunck +--- + arch/ia64/kvm/vti.h | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + +Index: b/arch/ia64/kvm/vti.h +=================================================================== +--- a/arch/ia64/kvm/vti.h ++++ b/arch/ia64/kvm/vti.h +@@ -83,13 +83,13 @@ + union vac { + unsigned long value; + struct { +- int a_int:1; +- int a_from_int_cr:1; +- int a_to_int_cr:1; +- int a_from_psr:1; +- int a_from_cpuid:1; +- int a_cover:1; +- int a_bsw:1; ++ unsigned int a_int:1; ++ unsigned int a_from_int_cr:1; ++ unsigned int a_to_int_cr:1; ++ unsigned int a_from_psr:1; ++ unsigned int a_from_cpuid:1; ++ unsigned int a_cover:1; ++ unsigned int a_bsw:1; + long reserved:57; + }; + }; +@@ -97,12 +97,12 @@ union vac { + union vdc { + unsigned long value; + struct { +- int d_vmsw:1; +- int d_extint:1; +- int d_ibr_dbr:1; +- int d_pmc:1; +- int d_to_pmd:1; +- int d_itm:1; ++ unsigned int d_vmsw:1; ++ unsigned int d_extint:1; ++ unsigned int d_ibr_dbr:1; ++ unsigned int d_pmc:1; ++ unsigned int d_to_pmd:1; ++ unsigned int d_itm:1; + long reserved:58; + }; + }; diff --git a/src/patches/suse-2.6.27.25/patches.fixes/ia64_uv_partition_id.diff b/src/patches/suse-2.6.27.25/patches.fixes/ia64_uv_partition_id.diff new file mode 100644 index 0000000000..030d68afbf --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/ia64_uv_partition_id.diff @@ -0,0 +1,35 @@ +From: Russ Anderson +Subject: Add partition id, coherence id, and region size to UV +References: bnc#442455 + +Add partition id, coherence id, and region size to UV. + +The SGI xp drivers (drivers/misc/sgi-xp) are used on both +sn (Itanium) and uv (Tukwilla). Using the same names +(sn_partition_id, sn_coherency_id, sn_region_size) +simplifies the driver code. + + +Signed-off-by: Russ Anderson +Acked-by: Bernhard Walle + +--- + + arch/ia64/uv/kernel/setup.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/arch/ia64/uv/kernel/setup.c ++++ b/arch/ia64/uv/kernel/setup.c +@@ -19,6 +19,12 @@ EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info) + + #ifdef CONFIG_IA64_SGI_UV + int sn_prom_type; ++long sn_partition_id; ++EXPORT_SYMBOL(sn_partition_id); ++long sn_coherency_id; ++EXPORT_SYMBOL_GPL(sn_coherency_id); ++long sn_region_size; ++EXPORT_SYMBOL(sn_region_size); + #endif + + struct redir_addr { diff --git a/src/patches/suse-2.6.27.25/patches.fixes/ia64_uv_watchlist.diff b/src/patches/suse-2.6.27.25/patches.fixes/ia64_uv_watchlist.diff new file mode 100644 index 0000000000..597622e9c8 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/ia64_uv_watchlist.diff @@ -0,0 +1,78 @@ +From: Bernhard Walle +Subject: Add UV watchlist support +References: bnc#442455 + +Add UV watchlist support. + +This is used by SGI xp drivers (drivers/misc/sgi-xp). + +Signed-off-by: Russ Anderson + +--- + + arch/ia64/include/asm/sn/sn_sal.h | 45 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 45 insertions(+) + +Index: linux/arch/ia64/include/asm/sn/sn_sal.h +=================================================================== +--- linux.orig/arch/ia64/include/asm/sn/sn_sal.h 2008-11-05 09:21:48.690243174 -0600 ++++ linux/arch/ia64/include/asm/sn/sn_sal.h 2008-11-05 09:22:01.847928152 -0600 +@@ -90,6 +90,8 @@ + #define SN_SAL_SET_CPU_NUMBER 0x02000068 + + #define SN_SAL_KERNEL_LAUNCH_EVENT 0x02000069 ++#define SN_SAL_WATCHLIST_ALLOC 0x02000070 ++#define SN_SAL_WATCHLIST_FREE 0x02000071 + + /* + * Service-specific constants +@@ -1183,6 +1185,49 @@ ia64_sn_kernel_launch_event(void) + { + struct ia64_sal_retval rv; + SAL_CALL_NOLOCK(rv, SN_SAL_KERNEL_LAUNCH_EVENT, 0, 0, 0, 0, 0, 0, 0); ++ return rv.status; ++} ++ ++union sn_watchlist_u { ++ u64 val; ++ struct { ++ u64 blade : 16, ++ size : 32, ++ filler : 16; ++ }; ++}; ++ ++static inline int ++sn_mq_watchlist_alloc(int blade, void *mq, unsigned int mq_size, ++ unsigned long *intr_mmr_offset) ++{ ++ struct ia64_sal_retval rv; ++ unsigned long addr; ++ union sn_watchlist_u size_blade; ++ int watchlist; ++ ++ addr = (unsigned long)mq; ++ size_blade.size = mq_size; ++ size_blade.blade = blade; ++ ++ /* ++ * bios returns watchlist number or negative error number. ++ */ ++ ia64_sal_oemcall_nolock(&rv, SN_SAL_WATCHLIST_ALLOC, addr, ++ size_blade.val, (u64)intr_mmr_offset, ++ (u64)&watchlist, 0, 0, 0); ++ if (rv.status < 0) ++ return rv.status; ++ ++ return watchlist; ++} ++ ++static inline int ++sn_mq_watchlist_free(int blade, int watchlist_num) ++{ ++ struct ia64_sal_retval rv; ++ ia64_sal_oemcall_nolock(&rv, SN_SAL_WATCHLIST_FREE, blade, ++ watchlist_num, 0, 0, 0, 0, 0); + return rv.status; + } + #endif /* _ASM_IA64_SN_SN_SAL_H */ diff --git a/src/patches/suse-2.6.27.25/patches.fixes/iwlwifi-fix-iwl-3945_led_device_naming.diff b/src/patches/suse-2.6.27.25/patches.fixes/iwlwifi-fix-iwl-3945_led_device_naming.diff new file mode 100644 index 0000000000..904d754d42 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/iwlwifi-fix-iwl-3945_led_device_naming.diff @@ -0,0 +1,57 @@ +From: Danny Kukawka +Subject: iwlwifi: another led naming fix + +Fixed led device naming for the iwlwifi (iwl-3945) driver. Due +to the documentation of the led subsystem/class the naming should +be "devicename:colour:function" while not applying sections +should be left blank. + +This should lead to e.g. "iwl-%s::RX" instead of "iwl-%s:RX". + +Signed-off-by: Danny Kukawka +Acked-by: Reinette Chatre +-- + iwl-led.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-led.c b/drivers/net/wireless/iwlwifi/iwl-3945-led.c +index 4c63890..09f9350 100644 +--- a/drivers/net/wireless/iwlwifi/iwl-3945-led.c ++++ b/drivers/net/wireless/iwlwifi/iwl-3945-led.c +@@ -317,7 +317,7 @@ int iwl3945_led_register(struct iwl3945_priv *priv) + + trigger = ieee80211_get_radio_led_name(priv->hw); + snprintf(priv->led[IWL_LED_TRG_RADIO].name, +- sizeof(priv->led[IWL_LED_TRG_RADIO].name), "iwl-%s:radio", ++ sizeof(priv->led[IWL_LED_TRG_RADIO].name), "iwl-%s::radio", + wiphy_name(priv->hw->wiphy)); + + priv->led[IWL_LED_TRG_RADIO].led_on = iwl3945_led_on; +@@ -333,7 +333,7 @@ int iwl3945_led_register(struct iwl3945_priv *priv) + + trigger = ieee80211_get_assoc_led_name(priv->hw); + snprintf(priv->led[IWL_LED_TRG_ASSOC].name, +- sizeof(priv->led[IWL_LED_TRG_ASSOC].name), "iwl-%s:assoc", ++ sizeof(priv->led[IWL_LED_TRG_ASSOC].name), "iwl-%s::assoc", + wiphy_name(priv->hw->wiphy)); + + ret = iwl3945_led_register_led(priv, +@@ -350,7 +350,7 @@ int iwl3945_led_register(struct iwl3945_priv *priv) + + trigger = ieee80211_get_rx_led_name(priv->hw); + snprintf(priv->led[IWL_LED_TRG_RX].name, +- sizeof(priv->led[IWL_LED_TRG_RX].name), "iwl-%s:RX", ++ sizeof(priv->led[IWL_LED_TRG_RX].name), "iwl-%s::RX", + wiphy_name(priv->hw->wiphy)); + + ret = iwl3945_led_register_led(priv, +@@ -366,7 +366,7 @@ int iwl3945_led_register(struct iwl3945_priv *priv) + + trigger = ieee80211_get_tx_led_name(priv->hw); + snprintf(priv->led[IWL_LED_TRG_TX].name, +- sizeof(priv->led[IWL_LED_TRG_TX].name), "iwl-%s:TX", ++ sizeof(priv->led[IWL_LED_TRG_TX].name), "iwl-%s::TX", + wiphy_name(priv->hw->wiphy)); + + ret = iwl3945_led_register_led(priv, + diff --git a/src/patches/suse-2.6.27.25/patches.fixes/iwlwifi-fix-iwl-led_device_naming.diff b/src/patches/suse-2.6.27.25/patches.fixes/iwlwifi-fix-iwl-led_device_naming.diff new file mode 100644 index 0000000000..5d7d18d3d3 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/iwlwifi-fix-iwl-led_device_naming.diff @@ -0,0 +1,54 @@ +From: Danny Kukawka +Subject: iwlwifi: fix led naming + +Fixed led device naming for the iwl driver. Due to the +documentation of the led subsystem/class the naming should be +"devicename:colour:function" while not applying sections +should be left blank. + +This should lead to e.g. "iwl-phy0::RX" instead of "iwl-phy0:RX". + +Signed-off-by: Danny Kukawka +Acked-by: Reinette Chatre +-- + drivers/net/wireless/iwlwifi/iwl-led.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/net/wireless/iwlwifi/iwl-led.c ++++ b/drivers/net/wireless/iwlwifi/iwl-led.c +@@ -353,7 +353,7 @@ int iwl_leds_register(struct iwl_priv *p + + trigger = ieee80211_get_radio_led_name(priv->hw); + snprintf(priv->led[IWL_LED_TRG_RADIO].name, +- sizeof(priv->led[IWL_LED_TRG_RADIO].name), "iwl-%s:radio", ++ sizeof(priv->led[IWL_LED_TRG_RADIO].name), "iwl-%s::radio", + wiphy_name(priv->hw->wiphy)); + + priv->led[IWL_LED_TRG_RADIO].led_on = iwl4965_led_on_reg; +@@ -367,7 +367,7 @@ int iwl_leds_register(struct iwl_priv *p + + trigger = ieee80211_get_assoc_led_name(priv->hw); + snprintf(priv->led[IWL_LED_TRG_ASSOC].name, +- sizeof(priv->led[IWL_LED_TRG_ASSOC].name), "iwl-%s:assoc", ++ sizeof(priv->led[IWL_LED_TRG_ASSOC].name), "iwl-%s::assoc", + wiphy_name(priv->hw->wiphy)); + + ret = iwl_leds_register_led(priv, &priv->led[IWL_LED_TRG_ASSOC], +@@ -383,7 +383,7 @@ int iwl_leds_register(struct iwl_priv *p + + trigger = ieee80211_get_rx_led_name(priv->hw); + snprintf(priv->led[IWL_LED_TRG_RX].name, +- sizeof(priv->led[IWL_LED_TRG_RX].name), "iwl-%s:RX", ++ sizeof(priv->led[IWL_LED_TRG_RX].name), "iwl-%s::RX", + wiphy_name(priv->hw->wiphy)); + + ret = iwl_leds_register_led(priv, &priv->led[IWL_LED_TRG_RX], +@@ -398,7 +398,7 @@ int iwl_leds_register(struct iwl_priv *p + + trigger = ieee80211_get_tx_led_name(priv->hw); + snprintf(priv->led[IWL_LED_TRG_TX].name, +- sizeof(priv->led[IWL_LED_TRG_TX].name), "iwl-%s:TX", ++ sizeof(priv->led[IWL_LED_TRG_TX].name), "iwl-%s::TX", + wiphy_name(priv->hw->wiphy)); + + ret = iwl_leds_register_led(priv, &priv->led[IWL_LED_TRG_TX], diff --git a/src/patches/suse-2.6.27.25/patches.fixes/kdb-kdump.diff b/src/patches/suse-2.6.27.25/patches.fixes/kdb-kdump.diff new file mode 100644 index 0000000000..088f13a772 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/kdb-kdump.diff @@ -0,0 +1,60 @@ +From: Jay Lan +Subject: Fix CONFIG_KDB_KDUMP on xSeries +Patch-mainline: not yet +References: bnc#436454 + +This patch fixes a problem that the capture kernel crashes with various +backtraces after the machine has been crashed (both sysrq-trigger and panic()). +Machines were that problem could reproduced at SUSE were molitor.suse.de and +korner.suse.de. + +KDB was turned off in that scenarios. + +That patch succeeds in following scenarios: + + a) kdb=0 + modprobe crasher call_panic + + b) kdb=1/0 + echo c > /proc/sysrq-trigger + + b) kdb=1 + ESC KDB + kdb> kdump + +But it fails in: + + kdb=1 + modprobe crasher call_panic + +That has to be investigated. But I think that's unrelated to that patch, +and it's no regression. + + +Signed-off-by: Jay Lan +Signed-off-by: Bernhard Walle + +--- + arch/x86/kdb/kdba_support.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/arch/x86/kdb/kdba_support.c ++++ b/arch/x86/kdb/kdba_support.c +@@ -35,8 +35,6 @@ void kdba_kdump_prepare(struct pt_regs * + if (regs == NULL) + regs = &r; + +- machine_crash_shutdown_begin(); +- + for (i = 1; i < NR_CPUS; ++i) { + if (!cpu_online(i)) + continue; +@@ -44,7 +42,7 @@ void kdba_kdump_prepare(struct pt_regs * + KDB_STATE_SET_CPU(KEXEC, i); + } + +- machine_crash_shutdown_end(regs); ++ machine_crash_shutdown(regs); + } + + extern void halt_current_cpu(struct pt_regs *); diff --git a/src/patches/suse-2.6.27.25/patches.fixes/kdb-oops-panic.diff b/src/patches/suse-2.6.27.25/patches.fixes/kdb-oops-panic.diff new file mode 100644 index 0000000000..734acfcd5e --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/kdb-oops-panic.diff @@ -0,0 +1,65 @@ +From: Bernhard Walle +Subject: [PATCH] Fix NULL pointer dereference when regs == NULL +References: bnc#439007 + +This patch fixes following problem: + +When panic() in user context, for example by + + # modprobe crasher call_panic + +then KDB crashed in kdba_getpc() once because regs was not checked for being +NULL: + + Entering kdb (current=0xffff880036c747c0, pid 4420) on processor 1 Oops: + BUG: unable to handle kernel NULL pointer dereference at 0000000000000080 + IP: [] kdba_getpc+0x0/0x8 + PGD 379f4067 PUD 39997067 PMD 0 + Oops: 0000 [1] SMP + last sysfs file: /sys/devices/pci0000:00/0000:00:1c.5/0000:06:00.0/irq + kdb: Debugger re-entered on cpu 1, new reason = 5 + Not executing a kdb command + No longjmp available for recovery + Cannot recover, allowing event to proceed + +Even if that has ieen fixed, then kdba_dumpregs() crashed because +the return value of kdba_getpc() was assumed to be non-NULL. + +This patch simply ports the error handling from its 32 bit counterpart +implementation. After applying that fix, the test mentioned above succeeds: + + Entering kdb (current=0xffff8800355fc480, pid 7564) on processor 1 Oops: + due to oops @ 0x0 + kdba_dumpregs: pt_regs not available, use bt* or pid to select a different task + [1]kdb> + + +Signed-off-by: Bernhard Walle + +--- + arch/x86/kdb/kdbasupport_64.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/arch/x86/kdb/kdbasupport_64.c ++++ b/arch/x86/kdb/kdbasupport_64.c +@@ -501,6 +501,11 @@ kdba_dumpregs(struct pt_regs *regs, + struct kdbregs *rlp; + kdb_machreg_t contents; + ++ if (!regs) { ++ kdb_printf("%s: pt_regs not available, use bt* or pid to select a different task\n", __FUNCTION__); ++ return KDB_BADREG; ++ } ++ + for (i=0, rlp=kdbreglist; i +Date: Mon, 20 Oct 2008 13:51:52 +0900 +Subject: [PATCH] x86, kdump: fix invalid access on i386 sparsemem +References: bnc#440525 + +Impact: fix kdump crash on 32-bit sparsemem kernels + +Since linux-2.6.27, kdump has failed on i386 sparsemem kernel. +1st-kernel gets a panic just before switching to 2nd-kernel. + +The cause is that a kernel accesses invalid mem_section by +page_to_pfn(image->swap_page) at machine_kexec(). +image->swap_page is allocated if kexec for hibernation, but +it is not allocated if kdump. So if kdump, a kernel should +not access the mem_section corresponding to image->swap_page. + +The attached patch fixes this invalid access. + +Signed-off-by: Ken'ichi Ohmichi +Cc: kexec-ml +Cc: Andrew Morton +Signed-off-by: Ingo Molnar +Acked-by: Bernhard Walle + +diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c +index 0732adb..7a38574 100644 +--- a/arch/x86/kernel/machine_kexec_32.c ++++ b/arch/x86/kernel/machine_kexec_32.c +@@ -162,7 +162,10 @@ void machine_kexec(struct kimage *image) + page_list[VA_PTE_0] = (unsigned long)kexec_pte0; + page_list[PA_PTE_1] = __pa(kexec_pte1); + page_list[VA_PTE_1] = (unsigned long)kexec_pte1; +- page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT); ++ ++ if (image->type == KEXEC_TYPE_DEFAULT) ++ page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) ++ << PAGE_SHIFT); + + /* The segment registers are funny things, they have both a + * visible and an invisible part. Whenever the visible part is diff --git a/src/patches/suse-2.6.27.25/patches.fixes/make-note_interrupt-fast.diff b/src/patches/suse-2.6.27.25/patches.fixes/make-note_interrupt-fast.diff new file mode 100644 index 0000000000..620057dd92 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/make-note_interrupt-fast.diff @@ -0,0 +1,160 @@ +From: Bernhard Walle +Subject: [PATCH] Fix performance regression on large IA64 systems +References: bnc #469589 + +This patch tries to address a performance regression discovered by SGI. + +Patch b60c1f6ffd88850079ae419aa933ab0eddbd5535 removes the call +to note_interrupt() in __do_IRQ(). Patch d85a60d85ea5b7c597508c1510c88e657773d378 +adds it again. Because it's needed for irqpoll. + +That patch now introduces a new parameter 'only_fixup' for note_interrupt(). +This parameter determines two cases: + + TRUE => The function should be only executed when irqfixup is set. + Either 'irqpoll' or 'irqfixup' directly set that. + + FALSE => Just the behaviour as note_interrupt() always had. + +Now the patch converts all calls of note_interrupt() to only_fixup=FALSE, +except the call that has been removed by b60c1f6ffd88850079ae419aa933ab0eddbd5535. +So that call is always done, but the body is only executed when either +'irqpoll' or 'irqfixup' are specified. + +This patch is not meant for mainline inclusion in the first run! + + +Signed-off-by: Bernhard Walle + +--- + arch/arm/mach-ns9xxx/irq.c | 2 +- + arch/powerpc/platforms/cell/interrupt.c | 2 +- + include/linux/irq.h | 2 +- + kernel/irq/chip.c | 10 +++++----- + kernel/irq/handle.c | 4 ++-- + kernel/irq/spurious.c | 13 ++++++++++++- + 6 files changed, 22 insertions(+), 11 deletions(-) + +--- a/arch/arm/mach-ns9xxx/irq.c ++++ b/arch/arm/mach-ns9xxx/irq.c +@@ -86,7 +86,7 @@ static void handle_prio_irq(unsigned int + /* XXX: There is no direct way to access noirqdebug, so check + * unconditionally for spurious irqs... + * Maybe this function should go to kernel/irq/chip.c? */ +- note_interrupt(irq, desc, action_ret); ++ note_interrupt(irq, desc, action_ret, 0); + + spin_lock(&desc->lock); + desc->status &= ~IRQ_INPROGRESS; +--- a/arch/powerpc/platforms/cell/interrupt.c ++++ b/arch/powerpc/platforms/cell/interrupt.c +@@ -270,7 +270,7 @@ static void handle_iic_irq(unsigned int + spin_unlock(&desc->lock); + action_ret = handle_IRQ_event(irq, action); + if (!noirqdebug) +- note_interrupt(irq, desc, action_ret); ++ note_interrupt(irq, desc, action_ret, 0); + spin_lock(&desc->lock); + + } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING); +--- a/include/linux/irq.h ++++ b/include/linux/irq.h +@@ -296,7 +296,7 @@ static inline void generic_handle_irq(un + + /* Handling of unhandled and spurious interrupts: */ + extern void note_interrupt(unsigned int irq, struct irq_desc *desc, +- int action_ret); ++ int action_ret, int only_fixup); + + /* Resending of interrupts :*/ + void check_irq_resend(struct irq_desc *desc, unsigned int irq); +--- a/kernel/irq/chip.c ++++ b/kernel/irq/chip.c +@@ -324,7 +324,7 @@ handle_simple_irq(unsigned int irq, stru + + action_ret = handle_IRQ_event(irq, action); + if (!noirqdebug) +- note_interrupt(irq, desc, action_ret); ++ note_interrupt(irq, desc, action_ret, 0); + + spin_lock(&desc->lock); + desc->status &= ~IRQ_INPROGRESS; +@@ -370,7 +370,7 @@ handle_level_irq(unsigned int irq, struc + + action_ret = handle_IRQ_event(irq, action); + if (!noirqdebug) +- note_interrupt(irq, desc, action_ret); ++ note_interrupt(irq, desc, action_ret, 0); + + spin_lock(&desc->lock); + desc->status &= ~IRQ_INPROGRESS; +@@ -423,7 +423,7 @@ handle_fasteoi_irq(unsigned int irq, str + + action_ret = handle_IRQ_event(irq, action); + if (!noirqdebug) +- note_interrupt(irq, desc, action_ret); ++ note_interrupt(irq, desc, action_ret, 0); + + spin_lock(&desc->lock); + desc->status &= ~IRQ_INPROGRESS; +@@ -503,7 +503,7 @@ handle_edge_irq(unsigned int irq, struct + spin_unlock(&desc->lock); + action_ret = handle_IRQ_event(irq, action); + if (!noirqdebug) +- note_interrupt(irq, desc, action_ret); ++ note_interrupt(irq, desc, action_ret, 0); + spin_lock(&desc->lock); + + } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING); +@@ -532,7 +532,7 @@ handle_percpu_irq(unsigned int irq, stru + + action_ret = handle_IRQ_event(irq, desc->action); + if (!noirqdebug) +- note_interrupt(irq, desc, action_ret); ++ note_interrupt(irq, desc, action_ret, 0); + + if (desc->chip->eoi) + desc->chip->eoi(irq); +--- a/kernel/irq/handle.c ++++ b/kernel/irq/handle.c +@@ -187,7 +187,7 @@ unsigned int __do_IRQ(unsigned int irq) + if (likely(!(desc->status & IRQ_DISABLED))) { + action_ret = handle_IRQ_event(irq, desc->action); + if (!noirqdebug) +- note_interrupt(irq, desc, action_ret); ++ note_interrupt(irq, desc, action_ret, 1); + } + desc->chip->end(irq); + return 1; +@@ -241,7 +241,7 @@ unsigned int __do_IRQ(unsigned int irq) + + action_ret = handle_IRQ_event(irq, action); + if (!noirqdebug) +- note_interrupt(irq, desc, action_ret); ++ note_interrupt(irq, desc, action_ret, 0); + + spin_lock(&desc->lock); + if (likely(!(desc->status & IRQ_PENDING))) +--- a/kernel/irq/spurious.c ++++ b/kernel/irq/spurious.c +@@ -171,8 +171,19 @@ static inline int try_misrouted_irq(unsi + } + + void note_interrupt(unsigned int irq, struct irq_desc *desc, +- irqreturn_t action_ret) ++ irqreturn_t action_ret, int only_fixup) + { ++ /* ++ * The parameter "only_fixup" means that the function should be only ++ * executed if this parameter is set to 1 and the function should ++ * not be executed if the parameter is 0. ++ * ++ * We need that because irqfixup is static to the function but ++ * this function is called from kernel/irq/handle.c. ++ */ ++ if (only_fixup && irqfixup == 0) ++ return; ++ + if (unlikely(action_ret != IRQ_HANDLED)) { + /* + * If we are seeing only the odd spurious IRQ caused by diff --git a/src/patches/suse-2.6.27.25/patches.fixes/nfs-acl-caching.diff b/src/patches/suse-2.6.27.25/patches.fixes/nfs-acl-caching.diff new file mode 100644 index 0000000000..3121700291 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/nfs-acl-caching.diff @@ -0,0 +1,46 @@ +From: Andreas Gruenbacher +Subject: "No acl" entry put in client-side acl cache instead of "not cached" +References: 171059 + +When the acl of a file is not cached and only the default acl of that +file is requested, a NULL "no acl" entry is put in the client-side acl +cache of nfs instead of ERR_PTR(-EAGAIN) "not cached". + +Signed-off-by: Andreas Gruenbacher + +Index: linux-2.6.16/fs/nfs/nfs3acl.c +=================================================================== +--- linux-2.6.16.orig/fs/nfs/nfs3acl.c ++++ linux-2.6.16/fs/nfs/nfs3acl.c +@@ -172,8 +172,10 @@ static void nfs3_cache_acls(struct inode + inode->i_ino, acl, dfacl); + spin_lock(&inode->i_lock); + __nfs3_forget_cached_acls(NFS_I(inode)); +- nfsi->acl_access = posix_acl_dup(acl); +- nfsi->acl_default = posix_acl_dup(dfacl); ++ if (!IS_ERR(acl)) ++ nfsi->acl_access = posix_acl_dup(acl); ++ if (!IS_ERR(dfacl)) ++ nfsi->acl_default = posix_acl_dup(dfacl); + spin_unlock(&inode->i_lock); + } + +@@ -250,7 +252,9 @@ struct posix_acl *nfs3_proc_getacl(struc + res.acl_access = NULL; + } + } +- nfs3_cache_acls(inode, res.acl_access, res.acl_default); ++ nfs3_cache_acls(inode, ++ (res.mask & NFS_ACL) ? res.acl_access : ERR_PTR(-EINVAL), ++ (res.mask & NFS_DFACL) ? res.acl_default : ERR_PTR(-EINVAL)); + + switch(type) { + case ACL_TYPE_ACCESS: +@@ -321,6 +325,7 @@ static int nfs3_proc_setacls(struct inod + switch (status) { + case 0: + status = nfs_refresh_inode(inode, &fattr); ++ nfs3_cache_acls(inode, acl, dfacl); + break; + case -EPFNOSUPPORT: + case -EPROTONOSUPPORT: diff --git a/src/patches/suse-2.6.27.25/patches.fixes/proc-scsi-scsi-fix.diff b/src/patches/suse-2.6.27.25/patches.fixes/proc-scsi-scsi-fix.diff new file mode 100644 index 0000000000..45b2e0928a --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/proc-scsi-scsi-fix.diff @@ -0,0 +1,110 @@ +From: Jeff Mahoney +Subject: [PATCH] scsi: iterate over devices individually for /proc/scsi/scsi +References: 263731 +Patch-mainline: Probably never, hch wants to kill /proc/scsi/scsi anyway. + + On systems with very large numbers (> 1600 or so) of SCSI devices, + cat /proc/scsi/scsi ends up failing with -ENOMEM. This is due to + the show routine simply iterating over all of the devices with + bus_for_each_dev(), and trying to dump all of them into the buffer + at the same time. On my test system (using scsi_debug with 4064 devices), + the output ends up being ~ 632k, far more than kmalloc will typically allow. + + This patch defines its own seq_file opreations to iterate over the scsi + devices.The result is that each show() operation only dumps ~ 180 bytes + into the buffer at a time so we don't run out of memory. + + If the "Attached devices" header isn't required, we can dump the + sfile->private bit completely. + +Signed-off-by: Jeff Mahoney + +--- + + drivers/scsi/scsi_proc.c | 58 ++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 52 insertions(+), 6 deletions(-) + +--- a/drivers/scsi/scsi_proc.c ++++ b/drivers/scsi/scsi_proc.c +@@ -389,13 +389,59 @@ static ssize_t proc_scsi_write(struct fi + * @s: output goes here + * @p: not used + */ +-static int proc_scsi_show(struct seq_file *s, void *p) ++static int always_match(struct device *dev, void *data) + { +- seq_printf(s, "Attached devices:\n"); +- bus_for_each_dev(&scsi_bus_type, NULL, s, proc_print_scsidevice); +- return 0; ++ return 1; + } + ++static inline struct device *next_scsi_device(struct device *start) ++{ ++ struct device *next = bus_find_device(&scsi_bus_type, start, NULL, ++ always_match); ++ put_device(start); ++ return next; ++} ++ ++static void *scsi_seq_start(struct seq_file *sfile, loff_t *pos) ++{ ++ struct device *dev = NULL; ++ loff_t n = *pos; ++ ++ while ((dev = next_scsi_device(dev))) { ++ if (!n--) ++ break; ++ sfile->private++; ++ } ++ return dev; ++} ++ ++static void *scsi_seq_next(struct seq_file *sfile, void *v, loff_t *pos) ++{ ++ (*pos)++; ++ sfile->private++; ++ return next_scsi_device(v); ++} ++ ++static void scsi_seq_stop(struct seq_file *sfile, void *v) ++{ ++ put_device(v); ++} ++ ++static int scsi_seq_show(struct seq_file *sfile, void *dev) ++{ ++ if (!sfile->private) ++ seq_puts(sfile, "Attached devices:\n"); ++ ++ return proc_print_scsidevice(dev, sfile); ++} ++ ++static struct seq_operations scsi_seq_ops = { ++ .start = scsi_seq_start, ++ .next = scsi_seq_next, ++ .stop = scsi_seq_stop, ++ .show = scsi_seq_show ++}; ++ + /** + * proc_scsi_open - glue function + * @inode: not used +@@ -409,7 +455,7 @@ static int proc_scsi_open(struct inode * + * We don't really need this for the write case but it doesn't + * harm either. + */ +- return single_open(file, proc_scsi_show, NULL); ++ return seq_open(file, &scsi_seq_ops); + } + + static const struct file_operations proc_scsi_operations = { +@@ -418,7 +464,7 @@ static const struct file_operations proc + .read = seq_read, + .write = proc_scsi_write, + .llseek = seq_lseek, +- .release = single_release, ++ .release = seq_release, + }; + + /** diff --git a/src/patches/suse-2.6.27.25/patches.fixes/rt2x00-fix-led_device_naming.diff b/src/patches/suse-2.6.27.25/patches.fixes/rt2x00-fix-led_device_naming.diff new file mode 100644 index 0000000000..cfabe43ff1 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/rt2x00-fix-led_device_naming.diff @@ -0,0 +1,44 @@ +From: Danny Kukawka +Subject: rt2x00: fix led naming + +Fixed led device naming for the rt2x00 driver. Due to the +documentation of the led subsystem/class the naming should be +"devicename:colour:function" while not applying sections +should be left blank. + +This should lead to e.g. "%s::radio" instead of "%s:radio". + +Signed-off-by: Danny Kukawka +-- + drivers/net/wireless/rt2x00/rt2x00leds.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/net/wireless/rt2x00/rt2x00leds.c ++++ b/drivers/net/wireless/rt2x00/rt2x00leds.c +@@ -149,7 +149,7 @@ void rt2x00leds_register(struct rt2x00_d + rt2x00dev->ops->name, wiphy_name(rt2x00dev->hw->wiphy)); + + if (rt2x00dev->led_radio.flags & LED_INITIALIZED) { +- snprintf(name, sizeof(name), "%s:radio", dev_name); ++ snprintf(name, sizeof(name), "%s::radio", dev_name); + + retval = rt2x00leds_register_led(rt2x00dev, + &rt2x00dev->led_radio, +@@ -159,7 +159,7 @@ void rt2x00leds_register(struct rt2x00_d + } + + if (rt2x00dev->led_assoc.flags & LED_INITIALIZED) { +- snprintf(name, sizeof(name), "%s:assoc", dev_name); ++ snprintf(name, sizeof(name), "%s::assoc", dev_name); + + retval = rt2x00leds_register_led(rt2x00dev, + &rt2x00dev->led_assoc, +@@ -169,7 +169,7 @@ void rt2x00leds_register(struct rt2x00_d + } + + if (rt2x00dev->led_qual.flags & LED_INITIALIZED) { +- snprintf(name, sizeof(name), "%s:quality", dev_name); ++ snprintf(name, sizeof(name), "%s::quality", dev_name); + + retval = rt2x00leds_register_led(rt2x00dev, + &rt2x00dev->led_qual, diff --git a/src/patches/suse-2.6.27.25/patches.fixes/sd_liberal_28_sense_invalid.diff b/src/patches/suse-2.6.27.25/patches.fixes/sd_liberal_28_sense_invalid.diff new file mode 100644 index 0000000000..00ec3e9f19 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/sd_liberal_28_sense_invalid.diff @@ -0,0 +1,27 @@ +From: Oliver Neukum +Subject: fix medium presence misdetection in usb storage device +References: bnc#362850 + +From reading the SCSI spec it seems that having the valid bit 0 (0x70 +checked in scsi_sense_valid) should does not invalidate the ASC or ASQ. +[See page 37 of spc4r02.pdf]. It should only invalidate the INFORMATION +field. Therefore remove the sense_valid check from the USB quirk. + +Signed-off-by: Brandon Philips + +--- + drivers/scsi/sd.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/scsi/sd.c ++++ b/drivers/scsi/sd.c +@@ -1208,8 +1208,7 @@ sd_spinup_disk(struct scsi_disk *sdkp) + * Yes, this sense key/ASC combination shouldn't + * occur here. It's characteristic of these devices. + */ +- } else if (sense_valid && +- sshdr.sense_key == UNIT_ATTENTION && ++ } else if (sshdr.sense_key == UNIT_ATTENTION && + sshdr.asc == 0x28) { + if (!spintime) { + spintime_expire = jiffies + 5 * HZ; diff --git a/src/patches/suse-2.6.27.25/patches.fixes/uv-bios_call_memprotect.diff b/src/patches/suse-2.6.27.25/patches.fixes/uv-bios_call_memprotect.diff new file mode 100644 index 0000000000..2ded18f032 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/uv-bios_call_memprotect.diff @@ -0,0 +1,69 @@ +From: Russ Anderson +Subject: Add UV bios call to change memory protections. +References: bnc#442455 + + +Add UV bios call to change memory protections. + +Signed-off-by: Russ Anderson +Acked-by: Bernhard Walle + +--- + arch/x86/kernel/bios_uv.c | 8 ++++++++ + include/asm-x86/uv/bios.h | 10 +++++++++- + 2 files changed, 17 insertions(+), 1 deletion(-) + +Index: linux/arch/x86/kernel/bios_uv.c +=================================================================== +--- linux.orig/arch/x86/kernel/bios_uv.c 2008-11-05 11:12:16.101949483 -0600 ++++ linux/arch/x86/kernel/bios_uv.c 2008-11-05 11:13:15.289411601 -0600 +@@ -134,6 +134,14 @@ uv_bios_mq_watchlist_free(int blade, int + } + EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free); + ++s64 ++uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms) ++{ ++ return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len, ++ perms, 0, 0); ++} ++EXPORT_SYMBOL_GPL(uv_bios_change_memprotect); ++ + s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) + { + return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type, +Index: linux/include/asm-x86/uv/bios.h +=================================================================== +--- linux.orig/include/asm-x86/uv/bios.h 2008-11-05 11:12:16.117951501 -0600 ++++ linux/include/asm-x86/uv/bios.h 2008-11-05 11:13:15.301413114 -0600 +@@ -34,7 +34,8 @@ enum uv_bios_cmd { + UV_BIOS_GET_SN_INFO, + UV_BIOS_FREQ_BASE, + UV_BIOS_WATCHLIST_ALLOC, +- UV_BIOS_WATCHLIST_FREE ++ UV_BIOS_WATCHLIST_FREE, ++ UV_BIOS_MEMPROTECT + }; + + /* +@@ -82,6 +83,12 @@ union uv_watchlist_u { + }; + }; + ++enum uv_memprotect { ++ UV_MEMPROT_RESTRICT_ACCESS, ++ UV_MEMPROT_ALLOW_AMO, ++ UV_MEMPROT_ALLOW_RW ++}; ++ + /* + * bios calls have 6 parameters + */ +@@ -94,6 +101,7 @@ extern s64 uv_bios_freq_base(u64, u64 *) + extern int uv_bios_mq_watchlist_alloc(int, void *, unsigned int, + unsigned long *); + extern int uv_bios_mq_watchlist_free(int, int); ++extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); + + extern void uv_bios_init(void); + diff --git a/src/patches/suse-2.6.27.25/patches.fixes/uv-bios_call_partition.diff b/src/patches/suse-2.6.27.25/patches.fixes/uv-bios_call_partition.diff new file mode 100644 index 0000000000..6d42c73340 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/uv-bios_call_partition.diff @@ -0,0 +1,155 @@ +From: Russ Anderson +Subject: x86: Add UV partition call +References: bnc#442455 + +Add a bios call to return partitioning related info. + +Signed-off-by: Russ Anderson +Acked-by: Bernhard Walle + +--- + arch/x86/kernel/bios_uv.c | 44 ++++++++++++++++++++++++++++++++++----- + arch/x86/kernel/genx2apic_uv_x.c | 14 +++++++----- + include/asm-x86/uv/bios.h | 22 ++++++++++++++++--- + 3 files changed, 66 insertions(+), 14 deletions(-) + +--- a/arch/x86/kernel/bios_uv.c ++++ b/arch/x86/kernel/bios_uv.c +@@ -23,6 +23,7 @@ + #include + #include + #include ++#include + + struct uv_systab uv_systab; + +@@ -65,14 +66,47 @@ s64 uv_bios_call_reentrant(enum uv_bios_ + return ret; + } + +-long +-x86_bios_freq_base(unsigned long clock_type, unsigned long *ticks_per_second, +- unsigned long *drift_info) ++ ++long sn_partition_id; ++EXPORT_SYMBOL_GPL(sn_partition_id); ++long uv_coherency_id; ++EXPORT_SYMBOL_GPL(uv_coherency_id); ++long uv_region_size; ++EXPORT_SYMBOL_GPL(uv_region_size); ++int uv_type; ++ ++ ++s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, ++ long *region) ++{ ++ s64 ret; ++ u64 v0, v1; ++ union partition_info_u part; ++ ++ ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc, ++ (u64)(&v0), (u64)(&v1), 0, 0); ++ if (ret != BIOS_STATUS_SUCCESS) ++ return ret; ++ ++ part.val = v0; ++ if (uvtype) ++ *uvtype = part.hub_version; ++ if (partid) ++ *partid = part.partition_id; ++ if (coher) ++ *coher = part.coherence_id; ++ if (region) ++ *region = part.region_size; ++ return ret; ++} ++ ++ ++s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) + { + return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type, +- (u64)ticks_per_second, 0, 0, 0); ++ (u64)ticks_per_second, 0, 0, 0); + } +-EXPORT_SYMBOL_GPL(x86_bios_freq_base); ++EXPORT_SYMBOL_GPL(uv_bios_freq_base); + + + #ifdef CONFIG_EFI +--- a/arch/x86/kernel/genx2apic_uv_x.c ++++ b/arch/x86/kernel/genx2apic_uv_x.c +@@ -353,12 +353,12 @@ static __init void map_mmioh_high(int ma + + static __init void uv_rtc_init(void) + { +- long status, ticks_per_sec, drift; ++ long status; ++ u64 ticks_per_sec; + +- status = +- x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, +- &drift); +- if (status != 0 || ticks_per_sec < 100000) { ++ status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, ++ &ticks_per_sec); ++ if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) { + printk(KERN_WARNING + "unable to determine platform RTC clock frequency, " + "guessing.\n"); +@@ -523,6 +523,8 @@ void __init uv_system_init(void) + ~((1 << n_val) - 1)) << m_val; + + uv_bios_init(); ++ uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, ++ &uv_coherency_id, &uv_region_size); + uv_rtc_init(); + + for_each_present_cpu(cpu) { +@@ -544,7 +546,7 @@ void __init uv_system_init(void) + uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; + uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; + uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; +- uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ ++ uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id; + uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; + uv_node_to_blade[nid] = blade; + uv_cpu_to_blade[cpu] = blade; +--- a/include/asm-x86/uv/bios.h ++++ b/include/asm-x86/uv/bios.h +@@ -61,6 +61,16 @@ enum { + BIOS_FREQ_BASE_REALTIME_CLOCK = 2 + }; + ++union partition_info_u { ++ u64 val; ++ struct { ++ u64 hub_version : 8, ++ partition_id : 16, ++ coherence_id : 16, ++ region_size : 24; ++ }; ++}; ++ + /* + * bios calls have 6 parameters + */ +@@ -68,10 +78,16 @@ extern s64 uv_bios_call(enum uv_bios_cmd + extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64); + extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64); + ++extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *); ++extern s64 uv_bios_freq_base(u64, u64 *); ++ + extern void uv_bios_init(void); + +-extern long +-x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second, +- unsigned long *drift_info); ++extern int uv_type; ++extern long sn_partition_id; ++extern long uv_coherency_id; ++extern long uv_region_size; ++#define partition_coherence_id() (uv_coherency_id) ++ + + #endif /* _ASM_X86_BIOS_H */ diff --git a/src/patches/suse-2.6.27.25/patches.fixes/uv-bios_call_reserve_page.diff b/src/patches/suse-2.6.27.25/patches.fixes/uv-bios_call_reserve_page.diff new file mode 100644 index 0000000000..dbbb7b530b --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/uv-bios_call_reserve_page.diff @@ -0,0 +1,66 @@ +From: Russ Anderson +Subject: Add UV bios call to get the address of the reserved page. +References: bnc#442455 + +Add UV bios call to get the address of the reserved page. + + +Signed-off-by: Russ Anderson +Acked-by: Bernhard Walle + +--- + arch/x86/kernel/bios_uv.c | 11 +++++++++++ + include/asm-x86/uv/bios.h | 5 ++++- + 2 files changed, 15 insertions(+), 1 deletion(-) + +Index: linux/arch/x86/kernel/bios_uv.c +=================================================================== +--- linux.orig/arch/x86/kernel/bios_uv.c 2008-11-05 11:13:15.289411601 -0600 ++++ linux/arch/x86/kernel/bios_uv.c 2008-11-05 11:14:11.428488248 -0600 +@@ -142,6 +142,17 @@ uv_bios_change_memprotect(u64 paddr, u64 + } + EXPORT_SYMBOL_GPL(uv_bios_change_memprotect); + ++s64 ++uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len) ++{ ++ s64 ret; ++ ++ ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie, ++ (u64)addr, buf, (u64)len, 0); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa); ++ + s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) + { + return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type, +Index: linux/include/asm-x86/uv/bios.h +=================================================================== +--- linux.orig/include/asm-x86/uv/bios.h 2008-11-05 11:13:15.301413114 -0600 ++++ linux/include/asm-x86/uv/bios.h 2008-11-05 11:14:11.436489257 -0600 +@@ -35,13 +35,15 @@ enum uv_bios_cmd { + UV_BIOS_FREQ_BASE, + UV_BIOS_WATCHLIST_ALLOC, + UV_BIOS_WATCHLIST_FREE, +- UV_BIOS_MEMPROTECT ++ UV_BIOS_MEMPROTECT, ++ UV_BIOS_GET_PARTITION_ADDR + }; + + /* + * Status values returned from a BIOS call. + */ + enum { ++ BIOS_STATUS_MORE_PASSES = 1, + BIOS_STATUS_SUCCESS = 0, + BIOS_STATUS_UNIMPLEMENTED = -ENOSYS, + BIOS_STATUS_EINVAL = -EINVAL, +@@ -102,6 +104,7 @@ extern int uv_bios_mq_watchlist_alloc(in + unsigned long *); + extern int uv_bios_mq_watchlist_free(int, int); + extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); ++extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *); + + extern void uv_bios_init(void); + diff --git a/src/patches/suse-2.6.27.25/patches.fixes/uv-bios_call_watchlist.diff b/src/patches/suse-2.6.27.25/patches.fixes/uv-bios_call_watchlist.diff new file mode 100644 index 0000000000..c08a62ce1b --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/uv-bios_call_watchlist.diff @@ -0,0 +1,100 @@ +From: Russ Anderson +Subject: Add UV bios calls to allocate and free watchlists. +References: bnc#442455 + +Add UV bios calls to allocate and free watchlists. + + +Signed-off-by: Russ Anderson +Acked-by: Bernhard Walle + +--- + arch/x86/kernel/bios_uv.c | 33 +++++++++++++++++++++++++++++++++ + include/asm-x86/uv/bios.h | 17 ++++++++++++++++- + 2 files changed, 49 insertions(+), 1 deletion(-) + +--- a/arch/x86/kernel/bios_uv.c ++++ b/arch/x86/kernel/bios_uv.c +@@ -100,6 +100,39 @@ s64 uv_bios_get_sn_info(int fc, int *uvt + return ret; + } + ++int ++uv_bios_mq_watchlist_alloc(int blade, void *mq, unsigned int mq_size, ++ unsigned long *intr_mmr_offset) ++{ ++ union uv_watchlist_u size_blade; ++ unsigned long addr; ++ u64 watchlist; ++ s64 ret; ++ ++ addr = (unsigned long)mq; ++ size_blade.size = mq_size; ++ size_blade.blade = blade; ++ ++ /* ++ * bios returns watchlist number or negative error number. ++ */ ++ ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr, ++ size_blade.val, (u64)intr_mmr_offset, ++ (u64)&watchlist, 0); ++ if (ret < BIOS_STATUS_SUCCESS) ++ return ret; ++ ++ return watchlist; ++} ++EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc); ++ ++int ++uv_bios_mq_watchlist_free(int blade, int watchlist_num) ++{ ++ return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE, ++ blade, watchlist_num, 0, 0, 0); ++} ++EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free); + + s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) + { +--- a/include/asm-x86/uv/bios.h ++++ b/include/asm-x86/uv/bios.h +@@ -32,7 +32,9 @@ + enum uv_bios_cmd { + UV_BIOS_COMMON, + UV_BIOS_GET_SN_INFO, +- UV_BIOS_FREQ_BASE ++ UV_BIOS_FREQ_BASE, ++ UV_BIOS_WATCHLIST_ALLOC, ++ UV_BIOS_WATCHLIST_FREE + }; + + /* +@@ -71,6 +73,15 @@ union partition_info_u { + }; + }; + ++union uv_watchlist_u { ++ u64 val; ++ struct { ++ u64 blade : 16, ++ size : 32, ++ filler : 16; ++ }; ++}; ++ + /* + * bios calls have 6 parameters + */ +@@ -80,9 +91,13 @@ extern s64 uv_bios_call_reentrant(enum u + + extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *); + extern s64 uv_bios_freq_base(u64, u64 *); ++extern int uv_bios_mq_watchlist_alloc(int, void *, unsigned int, ++ unsigned long *); ++extern int uv_bios_mq_watchlist_free(int, int); + + extern void uv_bios_init(void); + ++extern unsigned long sn_rtc_cycles_per_second; + extern int uv_type; + extern long sn_partition_id; + extern long sn_coherency_id; diff --git a/src/patches/suse-2.6.27.25/patches.fixes/uv-bios_common.diff b/src/patches/suse-2.6.27.25/patches.fixes/uv-bios_common.diff new file mode 100644 index 0000000000..5864106033 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/uv-bios_common.diff @@ -0,0 +1,280 @@ +From: Russ Anderson +Subject: x86: Add UV bios call infrastructure +References: bnc#442455 + +Add the EFI callback function and associated wrapper code. +Initialize SAL system table entry info at boot time. + +Signed-off-by: Russ Anderson +Signed-off-by: Paul Jackson +Acked-by: Bernhard Walle + +--- + arch/x86/kernel/bios_uv.c | 101 ++++++++++++++++++++++++++++++--------- + arch/x86/kernel/genx2apic_uv_x.c | 1 + include/asm-x86/efi.h | 14 +++++ + include/asm-x86/uv/bios.h | 73 +++++++++++++++------------- + 4 files changed, 136 insertions(+), 53 deletions(-) + +--- a/arch/x86/kernel/bios_uv.c ++++ b/arch/x86/kernel/bios_uv.c +@@ -1,8 +1,6 @@ + /* + * BIOS run time interface routines. + * +- * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. +- * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or +@@ -16,33 +14,94 @@ + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. ++ * Copyright (c) Russ Anderson + */ + ++#include ++#include ++#include + #include + +-const char * +-x86_bios_strerror(long status) ++struct uv_systab uv_systab; ++ ++s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) + { +- const char *str; +- switch (status) { +- case 0: str = "Call completed without error"; break; +- case -1: str = "Not implemented"; break; +- case -2: str = "Invalid argument"; break; +- case -3: str = "Call completed with error"; break; +- default: str = "Unknown BIOS status code"; break; +- } +- return str; ++ struct uv_systab *tab = &uv_systab; ++ ++ if (!tab->function) ++ /* ++ * BIOS does not support UV systab ++ */ ++ return BIOS_STATUS_UNIMPLEMENTED; ++ ++ return efi_call6((void *)__va(tab->function), ++ (u64)which, a1, a2, a3, a4, a5); + } + +-long +-x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second, +- unsigned long *drift_info) ++s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, ++ u64 a4, u64 a5) + { +- struct uv_bios_retval isrv; ++ unsigned long bios_flags; ++ s64 ret; ++ ++ local_irq_save(bios_flags); ++ ret = uv_bios_call(which, a1, a2, a3, a4, a5); ++ local_irq_restore(bios_flags); ++ ++ return ret; ++} ++ ++s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, ++ u64 a4, u64 a5) ++{ ++ s64 ret; ++ ++ preempt_disable(); ++ ret = uv_bios_call(which, a1, a2, a3, a4, a5); ++ preempt_enable(); + +- BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0); +- *ticks_per_second = isrv.v0; +- *drift_info = isrv.v1; +- return isrv.status; ++ return ret; ++} ++ ++long ++x86_bios_freq_base(unsigned long clock_type, unsigned long *ticks_per_second, ++ unsigned long *drift_info) ++{ ++ return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type, ++ (u64)ticks_per_second, 0, 0, 0); + } + EXPORT_SYMBOL_GPL(x86_bios_freq_base); ++ ++ ++#ifdef CONFIG_EFI ++void uv_bios_init(void) ++{ ++ struct uv_systab *tab; ++ ++ if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || ++ (efi.uv_systab == (unsigned long)NULL)) { ++ printk(KERN_CRIT "No EFI UV System Table.\n"); ++ uv_systab.function = (unsigned long)NULL; ++ return; ++ } ++ ++ tab = (struct uv_systab *)ioremap(efi.uv_systab, ++ sizeof(struct uv_systab)); ++ if (strncmp(tab->signature, "UVST", 4) != 0) ++ printk(KERN_ERR "bad signature in UV system table!"); ++ ++ /* ++ * Copy table to permanent spot for later use. ++ */ ++ memcpy(&uv_systab, tab, sizeof(struct uv_systab)); ++ iounmap(tab); ++ ++ printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision); ++} ++#else /* !CONFIG_EFI */ ++ ++void uv_bios_init(void) { } ++#endif ++ +--- a/arch/x86/kernel/genx2apic_uv_x.c ++++ b/arch/x86/kernel/genx2apic_uv_x.c +@@ -522,6 +522,7 @@ void __init uv_system_init(void) + gnode_upper = (((unsigned long)node_id.s.node_id) & + ~((1 << n_val) - 1)) << m_val; + ++ uv_bios_init(); + uv_rtc_init(); + + for_each_present_cpu(cpu) { +--- a/include/asm-x86/efi.h ++++ b/include/asm-x86/efi.h +@@ -49,6 +49,20 @@ extern u64 efi_call5(void *fp, u64 arg1, + extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, + u64 arg4, u64 arg5, u64 arg6); + ++ ++#ifndef CONFIG_EFI ++/* ++ * IF EFI is not configured, have the EFI calls return -ENOSYS. ++ */ ++#define efi_call0(_f) (-ENOSYS) ++#define efi_call1(_f, _a1) (-ENOSYS) ++#define efi_call2(_f, _a1, _a2) (-ENOSYS) ++#define efi_call3(_f, _a1, _a2, _a3) (-ENOSYS) ++#define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS) ++#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS) ++#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS) ++#endif /* CONFIG_EFI */ ++ + #define efi_call_phys0(f) \ + efi_call0((void *)(f)) + #define efi_call_phys1(f, a1) \ +--- a/include/asm-x86/uv/bios.h ++++ b/include/asm-x86/uv/bios.h +@@ -2,9 +2,7 @@ + #define _ASM_X86_BIOS_H + + /* +- * BIOS layer definitions. +- * +- * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. ++ * UV BIOS layer definitions. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -19,50 +17,61 @@ + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. ++ * Copyright (c) Russ Anderson + */ + + #include + +-#define BIOS_FREQ_BASE 0x01000001 ++/* ++ * Values for the BIOS calls. It is passed as the first * argument in the ++ * BIOS call. Passing any other value in the first argument will result ++ * in a BIOS_STATUS_UNIMPLEMENTED return status. ++ */ ++enum uv_bios_cmd { ++ UV_BIOS_COMMON, ++ UV_BIOS_GET_SN_INFO, ++ UV_BIOS_FREQ_BASE ++}; + ++/* ++ * Status values returned from a BIOS call. ++ */ + enum { +- BIOS_FREQ_BASE_PLATFORM = 0, +- BIOS_FREQ_BASE_INTERVAL_TIMER = 1, +- BIOS_FREQ_BASE_REALTIME_CLOCK = 2 ++ BIOS_STATUS_SUCCESS = 0, ++ BIOS_STATUS_UNIMPLEMENTED = -ENOSYS, ++ BIOS_STATUS_EINVAL = -EINVAL, ++ BIOS_STATUS_UNAVAIL = -EBUSY + }; + +-# define BIOS_CALL(result, a0, a1, a2, a3, a4, a5, a6, a7) \ +- do { \ +- /* XXX - the real call goes here */ \ +- result.status = BIOS_STATUS_UNIMPLEMENTED; \ +- isrv.v0 = 0; \ +- isrv.v1 = 0; \ +- } while (0) ++/* ++ * The UV system table describes specific firmware ++ * capabilities available to the Linux kernel at runtime. ++ */ ++struct uv_systab { ++ char signature[4]; /* must be "UVST" */ ++ u32 revision; /* distinguish different firmware revs */ ++ u64 function; /* BIOS runtime callback function ptr */ ++}; + + enum { +- BIOS_STATUS_SUCCESS = 0, +- BIOS_STATUS_UNIMPLEMENTED = -1, +- BIOS_STATUS_EINVAL = -2, +- BIOS_STATUS_ERROR = -3 ++ BIOS_FREQ_BASE_PLATFORM = 0, ++ BIOS_FREQ_BASE_INTERVAL_TIMER = 1, ++ BIOS_FREQ_BASE_REALTIME_CLOCK = 2 + }; + +-struct uv_bios_retval { +- /* +- * A zero status value indicates call completed without error. +- * A negative status value indicates reason of call failure. +- * A positive status value indicates success but an +- * informational value should be printed (e.g., "reboot for +- * change to take effect"). +- */ +- s64 status; +- u64 v0; +- u64 v1; +- u64 v2; +-}; ++/* ++ * bios calls have 6 parameters ++ */ ++extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64); ++extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64); ++extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64); ++ ++extern void uv_bios_init(void); + + extern long + x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second, + unsigned long *drift_info); +-extern const char *x86_bios_strerror(long status); + + #endif /* _ASM_X86_BIOS_H */ diff --git a/src/patches/suse-2.6.27.25/patches.fixes/uv-efi_bios.diff b/src/patches/suse-2.6.27.25/patches.fixes/uv-efi_bios.diff new file mode 100644 index 0000000000..1123b37698 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/uv-efi_bios.diff @@ -0,0 +1,53 @@ +From: Russ Anderson +Subject: x86: Add UV EFI table entry +References: bnc#442455 + +Add an EFI table entry for SGI UV system. +Look for the entry in the EFI tables. + +Signed-off-by: Russ Anderson +Signed-off-by: Paul Jackson +Acked-by: Bernhard Walle + +--- + arch/x86/kernel/efi.c | 4 ++++ + include/linux/efi.h | 4 ++++ + 2 files changed, 8 insertions(+) + +Index: linux/arch/x86/kernel/efi.c +=================================================================== +--- linux.orig/arch/x86/kernel/efi.c 2008-10-15 09:56:13.000000000 -0500 ++++ linux/arch/x86/kernel/efi.c 2008-10-15 09:56:23.000000000 -0500 +@@ -367,6 +367,10 @@ void __init efi_init(void) + efi.smbios = config_tables[i].table; + printk(" SMBIOS=0x%lx ", config_tables[i].table); + } else if (!efi_guidcmp(config_tables[i].guid, ++ UV_SYSTEM_TABLE_GUID)) { ++ efi.uv_systab = config_tables[i].table; ++ printk(" UVsystab=0x%lx ", config_tables[i].table); ++ } else if (!efi_guidcmp(config_tables[i].guid, + HCDP_TABLE_GUID)) { + efi.hcdp = config_tables[i].table; + printk(" HCDP=0x%lx ", config_tables[i].table); +Index: linux/include/linux/efi.h +=================================================================== +--- linux.orig/include/linux/efi.h 2008-10-15 09:56:13.000000000 -0500 ++++ linux/include/linux/efi.h 2008-10-15 09:56:23.000000000 -0500 +@@ -208,6 +208,9 @@ typedef efi_status_t efi_set_virtual_add + #define EFI_GLOBAL_VARIABLE_GUID \ + EFI_GUID( 0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c ) + ++#define UV_SYSTEM_TABLE_GUID \ ++ EFI_GUID( 0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93 ) ++ + typedef struct { + efi_guid_t guid; + unsigned long table; +@@ -255,6 +258,7 @@ extern struct efi { + unsigned long boot_info; /* boot info table */ + unsigned long hcdp; /* HCDP table */ + unsigned long uga; /* UGA table */ ++ unsigned long uv_systab; /* UV system table */ + efi_get_time_t *get_time; + efi_set_time_t *set_time; + efi_get_wakeup_time_t *get_wakeup_time; diff --git a/src/patches/suse-2.6.27.25/patches.fixes/uv-sn_region_size.diff b/src/patches/suse-2.6.27.25/patches.fixes/uv-sn_region_size.diff new file mode 100644 index 0000000000..c3aec1d8fe --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/uv-sn_region_size.diff @@ -0,0 +1,70 @@ +From: Russ Anderson +Subject: Use consistent names for region size and conherence id on x86 and ia64. +References: bnc#442455 + +The SGI xp drivers are used on both ia64 and x86. Using the same +names (sn_coherency_id, sn_region_size) simplies the driver code. + + +Signed-off-by: Russ Anderson +Acked-by: Bernhard Walle + +--- + + arch/x86/kernel/bios_uv.c | 8 ++++---- + arch/x86/kernel/genx2apic_uv_x.c | 4 ++-- + include/asm-x86/uv/bios.h | 6 +++--- + 3 files changed, 9 insertions(+), 9 deletions(-) + +--- a/arch/x86/kernel/bios_uv.c ++++ b/arch/x86/kernel/bios_uv.c +@@ -69,10 +69,10 @@ s64 uv_bios_call_reentrant(enum uv_bios_ + + long sn_partition_id; + EXPORT_SYMBOL_GPL(sn_partition_id); +-long uv_coherency_id; +-EXPORT_SYMBOL_GPL(uv_coherency_id); +-long uv_region_size; +-EXPORT_SYMBOL_GPL(uv_region_size); ++long sn_coherency_id; ++EXPORT_SYMBOL_GPL(sn_coherency_id); ++long sn_region_size; ++EXPORT_SYMBOL_GPL(sn_region_size); + int uv_type; + + +--- a/arch/x86/kernel/genx2apic_uv_x.c ++++ b/arch/x86/kernel/genx2apic_uv_x.c +@@ -524,7 +524,7 @@ void __init uv_system_init(void) + + uv_bios_init(); + uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, +- &uv_coherency_id, &uv_region_size); ++ &sn_coherency_id, &sn_region_size); + uv_rtc_init(); + + for_each_present_cpu(cpu) { +@@ -546,7 +546,7 @@ void __init uv_system_init(void) + uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; + uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; + uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; +- uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id; ++ uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; + uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; + uv_node_to_blade[nid] = blade; + uv_cpu_to_blade[cpu] = blade; +--- a/include/asm-x86/uv/bios.h ++++ b/include/asm-x86/uv/bios.h +@@ -85,9 +85,9 @@ extern void uv_bios_init(void); + + extern int uv_type; + extern long sn_partition_id; +-extern long uv_coherency_id; +-extern long uv_region_size; +-#define partition_coherence_id() (uv_coherency_id) ++extern long sn_coherency_id; ++extern long sn_region_size; ++#define partition_coherence_id() (sn_coherency_id) + + extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ + diff --git a/src/patches/suse-2.6.27.25/patches.fixes/uv-sysfs.diff b/src/patches/suse-2.6.27.25/patches.fixes/uv-sysfs.diff new file mode 100644 index 0000000000..f9ac9c3e77 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/uv-sysfs.diff @@ -0,0 +1,109 @@ +From: Russ Anderson +Subject: x86: Add UV sysfs entries +References: bnc#442455 + +Create /sys/firmware/sgi_uv sysfs entries for partition_id and coherence_id. + +Signed-off-by: Russ Anderson +Acked-by: Bernhard Walle + +--- + arch/x86/kernel/Makefile | 1 + arch/x86/kernel/uv_sysfs.c | 72 +++++++++++++++++++++++++++++++++++++++++++++ + include/asm-x86/uv/bios.h | 1 + 3 files changed, 74 insertions(+) + +--- a/arch/x86/kernel/Makefile ++++ b/arch/x86/kernel/Makefile +@@ -103,6 +103,7 @@ obj-$(CONFIG_OLPC) += olpc.o + # 64 bit specific files + ifeq ($(CONFIG_X86_64),y) + obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o ++ obj-y += uv_sysfs.o + obj-y += genx2apic_cluster.o + obj-y += genx2apic_phys.o + obj-y += bios_uv.o +--- /dev/null ++++ b/arch/x86/kernel/uv_sysfs.c +@@ -0,0 +1,72 @@ ++/* ++ * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. ++ * Copyright (c) Russ Anderson ++ */ ++ ++#include ++#include ++ ++struct kobject *sgi_uv_kobj; ++ ++static ssize_t partition_id_show(struct kobject *kobj, ++ struct kobj_attribute *attr, char *buf) ++{ ++ return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id); ++} ++ ++static ssize_t coherence_id_show(struct kobject *kobj, ++ struct kobj_attribute *attr, char *buf) ++{ ++ return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id()); ++} ++ ++static struct kobj_attribute partition_id_attr = ++ __ATTR(partition_id, S_IRUGO, partition_id_show, NULL); ++ ++static struct kobj_attribute coherence_id_attr = ++ __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL); ++ ++ ++static int __init sgi_uv_sysfs_init(void) ++{ ++ unsigned long ret; ++ ++ if (!sgi_uv_kobj) ++ sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); ++ if (!sgi_uv_kobj) { ++ printk(KERN_WARNING "kobject_create_and_add sgi_uv failed \n"); ++ return -EINVAL; ++ } ++ ++ ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); ++ if (ret) { ++ printk(KERN_WARNING "sysfs_create_file partition_id failed \n"); ++ return ret; ++ } ++ ++ ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); ++ if (ret) { ++ printk(KERN_WARNING "sysfs_create_file coherence_id failed \n"); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++device_initcall(sgi_uv_sysfs_init); +--- a/include/asm-x86/uv/bios.h ++++ b/include/asm-x86/uv/bios.h +@@ -89,5 +89,6 @@ extern long uv_coherency_id; + extern long uv_region_size; + #define partition_coherence_id() (uv_coherency_id) + ++extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ + + #endif /* _ASM_X86_BIOS_H */ diff --git a/src/patches/suse-2.6.27.25/patches.fixes/uv-xp-change_memprotect.diff b/src/patches/suse-2.6.27.25/patches.fixes/uv-xp-change_memprotect.diff new file mode 100644 index 0000000000..e3433463ed --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/uv-xp-change_memprotect.diff @@ -0,0 +1,228 @@ +From: Dean Nelson +Subject: Define xp_expand_memprotect() and xp_restrict_memprotect() +References: bnc#442461 + +Define xp_expand_memprotect() and xp_restrict_memprotect() so they can be +tailered to the hardware they are run on. + +Signed-off-by: Dean Nelson +Acked-by: Bernhard Walle + +--- + + drivers/misc/sgi-xp/xp.h | 7 +++- + drivers/misc/sgi-xp/xp_main.c | 7 ++++ + drivers/misc/sgi-xp/xp_sn2.c | 34 +++++++++++++++++++++ + drivers/misc/sgi-xp/xp_uv.c | 66 ++++++++++++++++++++++++++++++++++++++++++ + drivers/misc/sgi-xp/xpc_sn2.c | 15 +++------ + 5 files changed, 117 insertions(+), 12 deletions(-) + +--- a/drivers/misc/sgi-xp/xp.h ++++ b/drivers/misc/sgi-xp/xp.h +@@ -190,9 +190,10 @@ enum xp_retval { + xpGruSendMqError, /* 59: gru send message queue related error */ + + xpBadChannelNumber, /* 60: invalid channel number */ +- xpBadMsgType, /* 60: invalid message type */ ++ xpBadMsgType, /* 61: invalid message type */ ++ xpBiosError, /* 62: BIOS error */ + +- xpUnknownReason /* 61: unknown reason - must be last in enum */ ++ xpUnknownReason /* 63: unknown reason - must be last in enum */ + }; + + /* +@@ -341,6 +342,8 @@ extern unsigned long (*xp_pa) (void *); + extern enum xp_retval (*xp_remote_memcpy) (unsigned long, const unsigned long, + size_t); + extern int (*xp_cpu_to_nasid) (int); ++extern enum xp_retval (*xp_expand_memprotect) (unsigned long, unsigned long); ++extern enum xp_retval (*xp_restrict_memprotect) (unsigned long, unsigned long); + + extern u64 xp_nofault_PIOR_target; + extern int xp_nofault_PIOR(void *); +--- a/drivers/misc/sgi-xp/xp_main.c ++++ b/drivers/misc/sgi-xp/xp_main.c +@@ -51,6 +51,13 @@ EXPORT_SYMBOL_GPL(xp_remote_memcpy); + int (*xp_cpu_to_nasid) (int cpuid); + EXPORT_SYMBOL_GPL(xp_cpu_to_nasid); + ++enum xp_retval (*xp_expand_memprotect) (unsigned long phys_addr, ++ unsigned long size); ++EXPORT_SYMBOL_GPL(xp_expand_memprotect); ++enum xp_retval (*xp_restrict_memprotect) (unsigned long phys_addr, ++ unsigned long size); ++EXPORT_SYMBOL_GPL(xp_restrict_memprotect); ++ + /* + * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level + * users of XPC. +--- a/drivers/misc/sgi-xp/xp_sn2.c ++++ b/drivers/misc/sgi-xp/xp_sn2.c +@@ -120,6 +120,38 @@ xp_cpu_to_nasid_sn2(int cpuid) + return cpuid_to_nasid(cpuid); + } + ++static enum xp_retval ++xp_expand_memprotect_sn2(unsigned long phys_addr, unsigned long size) ++{ ++ u64 nasid_array = 0; ++ int ret; ++ ++ ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1, ++ &nasid_array); ++ if (ret != 0) { ++ dev_err(xp, "sn_change_memprotect(,, " ++ "SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret); ++ return xpSalError; ++ } ++ return xpSuccess; ++} ++ ++static enum xp_retval ++xp_restrict_memprotect_sn2(unsigned long phys_addr, unsigned long size) ++{ ++ u64 nasid_array = 0; ++ int ret; ++ ++ ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0, ++ &nasid_array); ++ if (ret != 0) { ++ dev_err(xp, "sn_change_memprotect(,, " ++ "SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret); ++ return xpSalError; ++ } ++ return xpSuccess; ++} ++ + enum xp_retval + xp_init_sn2(void) + { +@@ -132,6 +164,8 @@ xp_init_sn2(void) + xp_pa = xp_pa_sn2; + xp_remote_memcpy = xp_remote_memcpy_sn2; + xp_cpu_to_nasid = xp_cpu_to_nasid_sn2; ++ xp_expand_memprotect = xp_expand_memprotect_sn2; ++ xp_restrict_memprotect = xp_restrict_memprotect_sn2; + + return xp_register_nofault_code_sn2(); + } +--- a/drivers/misc/sgi-xp/xp_uv.c ++++ b/drivers/misc/sgi-xp/xp_uv.c +@@ -15,6 +15,11 @@ + + #include + #include ++#if defined CONFIG_X86_64 ++#include ++#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV ++#include ++#endif + #include "../sgi-gru/grukservices.h" + #include "xp.h" + +@@ -49,6 +54,65 @@ xp_cpu_to_nasid_uv(int cpuid) + return UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpuid)); + } + ++static enum xp_retval ++xp_expand_memprotect_uv(unsigned long phys_addr, unsigned long size) ++{ ++ int ret; ++ ++#if defined CONFIG_X86_64 ++ ret = uv_bios_change_memprotect(phys_addr, size, UV_MEMPROT_ALLOW_RW); ++ if (ret != BIOS_STATUS_SUCCESS) { ++ dev_err(xp, "uv_bios_change_memprotect(,, " ++ "UV_MEMPROT_ALLOW_RW) failed, ret=%d\n", ret); ++ return xpBiosError; ++ } ++ ++#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV ++ u64 nasid_array; ++ ++ ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1, ++ &nasid_array); ++ if (ret != 0) { ++ dev_err(xp, "sn_change_memprotect(,, " ++ "SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret); ++ return xpSalError; ++ } ++#else ++ #error not a supported configuration ++#endif ++ return xpSuccess; ++} ++ ++static enum xp_retval ++xp_restrict_memprotect_uv(unsigned long phys_addr, unsigned long size) ++{ ++ int ret; ++ ++#if defined CONFIG_X86_64 ++ ret = uv_bios_change_memprotect(phys_addr, size, ++ UV_MEMPROT_RESTRICT_ACCESS); ++ if (ret != BIOS_STATUS_SUCCESS) { ++ dev_err(xp, "uv_bios_change_memprotect(,, " ++ "UV_MEMPROT_RESTRICT_ACCESS) failed, ret=%d\n", ret); ++ return xpBiosError; ++ } ++ ++#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV ++ u64 nasid_array; ++ ++ ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0, ++ &nasid_array); ++ if (ret != 0) { ++ dev_err(xp, "sn_change_memprotect(,, " ++ "SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret); ++ return xpSalError; ++ } ++#else ++ #error not a supported configuration ++#endif ++ return xpSuccess; ++} ++ + enum xp_retval + xp_init_uv(void) + { +@@ -61,6 +125,8 @@ xp_init_uv(void) + xp_pa = xp_pa_uv; + xp_remote_memcpy = xp_remote_memcpy_uv; + xp_cpu_to_nasid = xp_cpu_to_nasid_uv; ++ xp_expand_memprotect = xp_expand_memprotect_uv; ++ xp_restrict_memprotect = xp_restrict_memprotect_uv; + + return xpSuccess; + } +--- a/drivers/misc/sgi-xp/xpc_sn2.c ++++ b/drivers/misc/sgi-xp/xpc_sn2.c +@@ -553,22 +553,17 @@ static u64 xpc_prot_vec_sn2[MAX_NUMNODES + static enum xp_retval + xpc_allow_amo_ops_sn2(struct amo *amos_page) + { +- u64 nasid_array = 0; +- int ret; ++ enum xp_retval ret = xpSuccess; + + /* + * On SHUB 1.1, we cannot call sn_change_memprotect() since the BIST + * collides with memory operations. On those systems we call + * xpc_allow_amo_ops_shub_wars_1_1_sn2() instead. + */ +- if (!enable_shub_wars_1_1()) { +- ret = sn_change_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE, +- SN_MEMPROT_ACCESS_CLASS_1, +- &nasid_array); +- if (ret != 0) +- return xpSalError; +- } +- return xpSuccess; ++ if (!enable_shub_wars_1_1()) ++ ret = xp_expand_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE); ++ ++ return ret; + } + + /* diff --git a/src/patches/suse-2.6.27.25/patches.fixes/uv-xpc-get_sn_info.diff b/src/patches/suse-2.6.27.25/patches.fixes/uv-xpc-get_sn_info.diff new file mode 100644 index 0000000000..795228d431 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/uv-xpc-get_sn_info.diff @@ -0,0 +1,28 @@ +From: Dean Nelson +Subject: [PATCH] Define xp_partition_id and xp_region_size +References: bnc#442461 + + +Define xp_partition_id and xp_region_size to their correct values. + +Signed-off-by: Dean Nelson +Acked-by: Bernhard Walle + +--- + + drivers/misc/sgi-xp/xp_uv.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/misc/sgi-xp/xp_uv.c ++++ b/drivers/misc/sgi-xp/xp_uv.c +@@ -119,8 +119,8 @@ xp_init_uv(void) + BUG_ON(!is_uv()); + + xp_max_npartitions = XP_MAX_NPARTITIONS_UV; +- xp_partition_id = 0; /* !!! not correct value */ +- xp_region_size = 0; /* !!! not correct value */ ++ xp_partition_id = sn_partition_id; ++ xp_region_size = sn_region_size; + + xp_pa = xp_pa_uv; + xp_remote_memcpy = xp_remote_memcpy_uv; diff --git a/src/patches/suse-2.6.27.25/patches.fixes/uv-xpc_create_gru_mq_uv.diff b/src/patches/suse-2.6.27.25/patches.fixes/uv-xpc_create_gru_mq_uv.diff new file mode 100644 index 0000000000..e1a82e6c56 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/uv-xpc_create_gru_mq_uv.diff @@ -0,0 +1,388 @@ +From: Dean Nelson +Subject: [PATCH] Add the code to create the activate and notify gru message queues +References: bnc#442461 + +For UV add the code to create the activate and notify gru message queues. + +Signed-off-by: Dean Nelson +Acked-by: Bernhard Walle + +--- + + drivers/misc/sgi-xp/xpc.h | 12 + + drivers/misc/sgi-xp/xpc_uv.c | 259 ++++++++++++++++++++++++++++++++++--------- + 2 files changed, 218 insertions(+), 53 deletions(-) + +--- a/drivers/misc/sgi-xp/xpc.h ++++ b/drivers/misc/sgi-xp/xpc.h +@@ -181,6 +181,18 @@ struct xpc_vars_part_sn2 { + xpc_nasid_mask_nlongs)) + + /* ++ * Info pertinent to a GRU message queue using a watch list for irq generation. ++ */ ++struct xpc_gru_mq_uv { ++ void *address; /* address of GRU message queue */ ++ unsigned int order; /* size of GRU message queue as a power of 2 */ ++ int irq; /* irq raised when message is received in mq */ ++ int mmr_blade; /* blade where watchlist was allocated from */ ++ unsigned long mmr_offset; /* offset of irq mmr located on mmr_blade */ ++ int watchlist_num; /* number of watchlist allocatd by BIOS */ ++}; ++ ++/* + * The activate_mq is used to send/receive GRU messages that affect XPC's + * heartbeat, partition active state, and channel state. This is UV only. + */ +--- a/drivers/misc/sgi-xp/xpc_uv.c ++++ b/drivers/misc/sgi-xp/xpc_uv.c +@@ -18,7 +18,15 @@ + #include + #include + #include ++#include + #include ++#if defined CONFIG_X86_64 ++#include ++#include ++#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV ++#include ++#include ++#endif + #include "../sgi-gru/gru.h" + #include "../sgi-gru/grukservices.h" + #include "xpc.h" +@@ -27,15 +35,17 @@ static atomic64_t xpc_heartbeat_uv; + static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV); + + #define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES) +-#define XPC_NOTIFY_MSG_SIZE_UV (2 * GRU_CACHE_LINE_BYTES) ++#define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ ++ XPC_ACTIVATE_MSG_SIZE_UV) ++#define XPC_ACTIVATE_IRQ_NAME "xpc_activate" + +-#define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ +- XPC_ACTIVATE_MSG_SIZE_UV) +-#define XPC_NOTIFY_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ +- XPC_NOTIFY_MSG_SIZE_UV) ++#define XPC_NOTIFY_MSG_SIZE_UV (2 * GRU_CACHE_LINE_BYTES) ++#define XPC_NOTIFY_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ ++ XPC_NOTIFY_MSG_SIZE_UV) ++#define XPC_NOTIFY_IRQ_NAME "xpc_notify" + +-static void *xpc_activate_mq_uv; +-static void *xpc_notify_mq_uv; ++static struct xpc_gru_mq_uv *xpc_activate_mq_uv; ++static struct xpc_gru_mq_uv *xpc_notify_mq_uv; + + static int + xpc_setup_partitions_sn_uv(void) +@@ -52,62 +62,209 @@ xpc_setup_partitions_sn_uv(void) + return 0; + } + +-static void * +-xpc_create_gru_mq_uv(unsigned int mq_size, int cpuid, unsigned int irq, ++static int ++xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) ++{ ++#if defined CONFIG_X86_64 ++ mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset); ++ if (mq->irq < 0) { ++ dev_err(xpc_part, "uv_setup_irq() returned error=%d\n", ++ mq->irq); ++ } ++ ++#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV ++ int mmr_pnode; ++ unsigned long mmr_value; ++ ++ if (strcmp(irq_name, XPC_ACTIVATE_IRQ_NAME) == 0) ++ mq->irq = SGI_XPC_ACTIVATE; ++ else if (strcmp(irq_name, XPC_NOTIFY_IRQ_NAME) == 0) ++ mq->irq = SGI_XPC_NOTIFY; ++ else ++ return -EINVAL; ++ ++ mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); ++ mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq; ++ ++ uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value); ++#else ++ #error not a supported configuration ++#endif ++ ++ return 0; ++} ++ ++static void ++xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq) ++{ ++#if defined CONFIG_X86_64 ++ uv_teardown_irq(mq->irq, mq->mmr_blade, mq->mmr_offset); ++ ++#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV ++ int mmr_pnode; ++ unsigned long mmr_value; ++ ++ mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); ++ mmr_value = 1UL << 16; ++ ++ uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value); ++#else ++ #error not a supported configuration ++#endif ++} ++ ++static int ++xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq) ++{ ++ int ret; ++ ++#if defined CONFIG_X86_64 ++ ret = uv_bios_mq_watchlist_alloc(mq->mmr_blade, mq->address, mq->order, ++ &mq->mmr_offset); ++ if (ret < 0) { ++ dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, " ++ "ret=%d\n", ret); ++ return ret; ++ } ++#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV ++ ret = sn_mq_watchlist_alloc(mq->mmr_blade, mq->address, mq->order, ++ &mq->mmr_offset); ++ if (ret < 0) { ++ dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n", ++ ret); ++ return -EBUSY; ++ } ++#else ++ #error not a supported configuration ++#endif ++ ++ mq->watchlist_num = ret; ++ return 0; ++} ++ ++static void ++xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq) ++{ ++ int ret; ++ ++#if defined CONFIG_X86_64 ++ ret = uv_bios_mq_watchlist_free(mq->mmr_blade, mq->watchlist_num); ++ BUG_ON(ret != BIOS_STATUS_SUCCESS); ++#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV ++ ret = sn_mq_watchlist_free(mq->mmr_blade, mq->watchlist_num); ++ BUG_ON(ret != SALRET_OK); ++#else ++ #error not a supported configuration ++#endif ++} ++ ++static struct xpc_gru_mq_uv * ++xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name, + irq_handler_t irq_handler) + { ++ enum xp_retval xp_ret; + int ret; + int nid; +- int mq_order; ++ int pg_order; + struct page *page; +- void *mq; ++ struct xpc_gru_mq_uv *mq; ++ ++ mq = kmalloc(sizeof(struct xpc_gru_mq_uv), GFP_KERNEL); ++ if (mq == NULL) { ++ dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() " ++ "a xpc_gru_mq_uv structure\n"); ++ ret = -ENOMEM; ++ goto out_1; ++ } ++ ++ pg_order = get_order(mq_size); ++ mq->order = pg_order + PAGE_SHIFT; ++ mq_size = 1UL << mq->order; + +- nid = cpu_to_node(cpuid); +- mq_order = get_order(mq_size); ++ mq->mmr_blade = uv_cpu_to_blade_id(cpu); ++ ++ nid = cpu_to_node(cpu); + page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, +- mq_order); ++ pg_order); + if (page == NULL) { + dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d " + "bytes of memory on nid=%d for GRU mq\n", mq_size, nid); +- return NULL; ++ ret = -ENOMEM; ++ goto out_2; + } ++ mq->address = page_address(page); + +- mq = page_address(page); +- ret = gru_create_message_queue(mq, mq_size); ++ ret = gru_create_message_queue(mq->address, mq_size); + if (ret != 0) { + dev_err(xpc_part, "gru_create_message_queue() returned " + "error=%d\n", ret); +- free_pages((unsigned long)mq, mq_order); +- return NULL; ++ ret = -EINVAL; ++ goto out_3; + } + +- /* !!! Need to do some other things to set up IRQ */ ++ /* enable generation of irq when GRU mq operation occurs to this mq */ ++ ret = xpc_gru_mq_watchlist_alloc_uv(mq); ++ if (ret != 0) ++ goto out_3; ++ ++ ret = xpc_get_gru_mq_irq_uv(mq, cpu, irq_name); ++ if (ret != 0) ++ goto out_4; + +- ret = request_irq(irq, irq_handler, 0, "xpc", NULL); ++ ret = request_irq(mq->irq, irq_handler, 0, irq_name, NULL); + if (ret != 0) { + dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n", +- irq, ret); +- free_pages((unsigned long)mq, mq_order); +- return NULL; ++ mq->irq, ret); ++ goto out_5; + } + +- /* !!! enable generation of irq when GRU mq op occurs to this mq */ +- +- /* ??? allow other partitions to access GRU mq? */ ++ /* allow other partitions to access this GRU mq */ ++ xp_ret = xp_expand_memprotect(xp_pa(mq->address), mq_size); ++ if (xp_ret != xpSuccess) { ++ ret = -EACCES; ++ goto out_6; ++ } + + return mq; ++ ++ /* something went wrong */ ++out_6: ++ free_irq(mq->irq, NULL); ++out_5: ++ xpc_release_gru_mq_irq_uv(mq); ++out_4: ++ xpc_gru_mq_watchlist_free_uv(mq); ++out_3: ++ free_pages((unsigned long)mq->address, pg_order); ++out_2: ++ kfree(mq); ++out_1: ++ return ERR_PTR(ret); + } + + static void +-xpc_destroy_gru_mq_uv(void *mq, unsigned int mq_size, unsigned int irq) ++xpc_destroy_gru_mq_uv(struct xpc_gru_mq_uv *mq) + { +- /* ??? disallow other partitions to access GRU mq? */ ++ unsigned int mq_size; ++ int pg_order; ++ int ret; ++ ++ /* disallow other partitions to access GRU mq */ ++ mq_size = 1UL << mq->order; ++ ret = xp_restrict_memprotect(xp_pa(mq->address), mq_size); ++ BUG_ON(ret != xpSuccess); ++ ++ /* unregister irq handler and release mq irq/vector mapping */ ++ free_irq(mq->irq, NULL); ++ xpc_release_gru_mq_irq_uv(mq); + +- /* !!! disable generation of irq when GRU mq op occurs to this mq */ ++ /* disable generation of irq when GRU mq op occurs to this mq */ ++ xpc_gru_mq_watchlist_free_uv(mq); + +- free_irq(irq, NULL); ++ pg_order = mq->order - PAGE_SHIFT; ++ free_pages((unsigned long)mq->address, pg_order); + +- free_pages((unsigned long)mq, get_order(mq_size)); ++ kfree(mq); + } + + static enum xp_retval +@@ -402,7 +559,10 @@ xpc_handle_activate_IRQ_uv(int irq, void + struct xpc_partition *part; + int wakeup_hb_checker = 0; + +- while ((msg_hdr = gru_get_next_message(xpc_activate_mq_uv)) != NULL) { ++ while (1) { ++ msg_hdr = gru_get_next_message(xpc_activate_mq_uv->address); ++ if (msg_hdr == NULL) ++ break; + + partid = msg_hdr->partid; + if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) { +@@ -418,7 +578,7 @@ xpc_handle_activate_IRQ_uv(int irq, void + } + } + +- gru_free_message(xpc_activate_mq_uv, msg_hdr); ++ gru_free_message(xpc_activate_mq_uv->address, msg_hdr); + } + + if (wakeup_hb_checker) +@@ -507,7 +667,7 @@ xpc_get_partition_rsvd_page_pa_uv(void * + static int + xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp) + { +- rp->sn.activate_mq_gpa = uv_gpa(xpc_activate_mq_uv); ++ rp->sn.activate_mq_gpa = uv_gpa(xpc_activate_mq_uv->address); + return 0; + } + +@@ -1410,22 +1570,18 @@ xpc_init_uv(void) + return -E2BIG; + } + +- /* ??? The cpuid argument's value is 0, is that what we want? */ +- /* !!! The irq argument's value isn't correct. */ +- xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0, 0, ++ xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0, ++ XPC_ACTIVATE_IRQ_NAME, + xpc_handle_activate_IRQ_uv); +- if (xpc_activate_mq_uv == NULL) +- return -ENOMEM; ++ if (IS_ERR(xpc_activate_mq_uv)) ++ return PTR_ERR(xpc_activate_mq_uv); + +- /* ??? The cpuid argument's value is 0, is that what we want? */ +- /* !!! The irq argument's value isn't correct. */ +- xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0, 0, ++ xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0, ++ XPC_NOTIFY_IRQ_NAME, + xpc_handle_notify_IRQ_uv); +- if (xpc_notify_mq_uv == NULL) { +- /* !!! The irq argument's value isn't correct. */ +- xpc_destroy_gru_mq_uv(xpc_activate_mq_uv, +- XPC_ACTIVATE_MQ_SIZE_UV, 0); +- return -ENOMEM; ++ if (IS_ERR(xpc_notify_mq_uv)) { ++ xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); ++ return PTR_ERR(xpc_notify_mq_uv); + } + + return 0; +@@ -1434,9 +1590,6 @@ xpc_init_uv(void) + void + xpc_exit_uv(void) + { +- /* !!! The irq argument's value isn't correct. */ +- xpc_destroy_gru_mq_uv(xpc_notify_mq_uv, XPC_NOTIFY_MQ_SIZE_UV, 0); +- +- /* !!! The irq argument's value isn't correct. */ +- xpc_destroy_gru_mq_uv(xpc_activate_mq_uv, XPC_ACTIVATE_MQ_SIZE_UV, 0); ++ xpc_destroy_gru_mq_uv(xpc_notify_mq_uv); ++ xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); + } diff --git a/src/patches/suse-2.6.27.25/patches.fixes/uv-xpc_get_part_rsvd_page.diff b/src/patches/suse-2.6.27.25/patches.fixes/uv-xpc_get_part_rsvd_page.diff new file mode 100644 index 0000000000..ca9d9f2b89 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/uv-xpc_get_part_rsvd_page.diff @@ -0,0 +1,63 @@ +From: Dean Nelson +Subject: [PATCH] Add support for getting the address of a partition's reserved page. +References: bnc#442461 + +Add support for getting the address of a partition's reserved page. + +Signed-off-by: Dean Nelson +Acked-by: Bernhard Walle + +--- + + drivers/misc/sgi-xp/xpc_uv.c | 31 ++++++++++++++++++++++++++++--- + 1 file changed, 28 insertions(+), 3 deletions(-) + +Index: linux/drivers/misc/sgi-xp/xpc_uv.c +=================================================================== +--- linux.orig/drivers/misc/sgi-xp/xpc_uv.c 2008-10-21 12:50:18.000000000 -0500 ++++ linux/drivers/misc/sgi-xp/xpc_uv.c 2008-10-21 14:00:13.000000000 -0500 +@@ -642,7 +642,7 @@ xpc_send_local_activate_IRQ_uv(struct xp + struct xpc_partition_uv *part_uv = &part->sn.uv; + + /* +- * !!! Make our side think that the remote parition sent an activate ++ * !!! Make our side think that the remote partition sent an activate + * !!! message our way by doing what the activate IRQ handler would + * !!! do had one really been sent. + */ +@@ -660,8 +660,33 @@ static enum xp_retval + xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa, + size_t *len) + { +- /* !!! call the UV version of sn_partition_reserved_page_pa() */ +- return xpUnsupported; ++ s64 status; ++ enum xp_retval ret; ++ ++#if defined CONFIG_X86_64 ++ status = uv_bios_reserved_page_pa((u64)buf, cookie, (u64 *)rp_pa, ++ (u64 *)len); ++ if (status == BIOS_STATUS_SUCCESS) ++ ret = xpSuccess; ++ else if (status == BIOS_STATUS_MORE_PASSES) ++ ret = xpNeedMoreInfo; ++ else ++ ret = xpBiosError; ++ ++#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV ++ status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len); ++ if (status == SALRET_OK) ++ ret = xpSuccess; ++ else if (status == SALRET_MORE_PASSES) ++ ret = xpNeedMoreInfo; ++ else ++ ret = xpSalError; ++ ++#else ++ #error not a supported configuration ++#endif ++ ++ return ret; + } + + static int diff --git a/src/patches/suse-2.6.27.25/patches.fixes/uv_setup_irq.diff b/src/patches/suse-2.6.27.25/patches.fixes/uv_setup_irq.diff new file mode 100644 index 0000000000..257cd327f3 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.fixes/uv_setup_irq.diff @@ -0,0 +1,239 @@ +From: Dean Nelson +Date: Thu, 2 Oct 2008 17:18:21 +0000 (-0500) +Subject: x86, UV: add uv_setup_irq() and uv_teardown_irq() functions, v3 +X-Git-Tag: v2.6.28-rc1~80^2~27 +X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=4173a0e7371ece227559b44943c6fd456ee470d1 +References: bnc#442461 + +x86, UV: add uv_setup_irq() and uv_teardown_irq() functions, v3 + +Provide a means for UV interrupt MMRs to be setup with the message to be sent +when an MSI is raised. + +Signed-off-by: Dean Nelson +Signed-off-by: Ingo Molnar +Acked-by: Bernhard Walle + +--- + arch/x86/kernel/Makefile | 2 - + arch/x86/kernel/io_apic_64.c | 68 +++++++++++++++++++++++++++++++++++++ + arch/x86/kernel/uv_irq.c | 79 +++++++++++++++++++++++++++++++++++++++++++ + include/asm-x86/uv/uv_irq.h | 36 +++++++++++++++++++ + 4 files changed, 184 insertions(+), 1 deletion(-) + +--- a/arch/x86/kernel/Makefile ++++ b/arch/x86/kernel/Makefile +@@ -106,7 +106,7 @@ ifeq ($(CONFIG_X86_64),y) + obj-y += uv_sysfs.o + obj-y += genx2apic_cluster.o + obj-y += genx2apic_phys.o +- obj-y += bios_uv.o ++ obj-y += bios_uv.o uv_irq.o + obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o + obj-$(CONFIG_AUDIT) += audit_64.o + +--- a/arch/x86/kernel/io_apic_64.c ++++ b/arch/x86/kernel/io_apic_64.c +@@ -51,6 +51,8 @@ + #include + #include + #include ++#include ++#include + + #include + #include +@@ -2787,6 +2789,72 @@ int arch_setup_ht_irq(unsigned int irq, + } + #endif /* CONFIG_HT_IRQ */ + ++#ifdef CONFIG_X86_64 ++/* ++ * Re-target the irq to the specified CPU and enable the specified MMR located ++ * on the specified blade to allow the sending of MSIs to the specified CPU. ++ */ ++int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, ++ unsigned long mmr_offset) ++{ ++ const cpumask_t *eligible_cpu = get_cpu_mask(cpu); ++ struct irq_cfg *cfg; ++ int mmr_pnode; ++ unsigned long mmr_value; ++ struct uv_IO_APIC_route_entry *entry; ++ unsigned long flags; ++ int err; ++ ++ err = assign_irq_vector(irq, eligible_cpu); ++ if (err != 0) ++ return err; ++ ++ spin_lock_irqsave(&vector_lock, flags); ++ set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, ++ irq_name); ++ spin_unlock_irqrestore(&vector_lock, flags); ++ ++ cfg = &irq_cfg[irq]; ++ ++ mmr_value = 0; ++ entry = (struct uv_IO_APIC_route_entry *)&mmr_value; ++ BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); ++ ++ entry->vector = cfg->vector; ++ entry->delivery_mode = INT_DELIVERY_MODE; ++ entry->dest_mode = INT_DEST_MODE; ++ entry->polarity = 0; ++ entry->trigger = 0; ++ entry->mask = 0; ++ entry->dest = cpu_mask_to_apicid(eligible_cpu); ++ ++ mmr_pnode = uv_blade_to_pnode(mmr_blade); ++ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); ++ ++ return irq; ++} ++ ++/* ++ * Disable the specified MMR located on the specified blade so that MSIs are ++ * longer allowed to be sent. ++ */ ++void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) ++{ ++ unsigned long mmr_value; ++ struct uv_IO_APIC_route_entry *entry; ++ int mmr_pnode; ++ ++ mmr_value = 0; ++ entry = (struct uv_IO_APIC_route_entry *)&mmr_value; ++ BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); ++ ++ entry->mask = 1; ++ ++ mmr_pnode = uv_blade_to_pnode(mmr_blade); ++ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); ++} ++#endif /* CONFIG_X86_64 */ ++ + /* -------------------------------------------------------------------------- + ACPI-based IOAPIC Configuration + -------------------------------------------------------------------------- */ +--- /dev/null ++++ b/arch/x86/kernel/uv_irq.c +@@ -0,0 +1,79 @@ ++/* ++ * This file is subject to the terms and conditions of the GNU General Public ++ * License. See the file "COPYING" in the main directory of this archive ++ * for more details. ++ * ++ * SGI UV IRQ functions ++ * ++ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. ++ */ ++ ++#include ++#include ++ ++#include ++#include ++ ++static void uv_noop(unsigned int irq) ++{ ++} ++ ++static unsigned int uv_noop_ret(unsigned int irq) ++{ ++ return 0; ++} ++ ++static void uv_ack_apic(unsigned int irq) ++{ ++ ack_APIC_irq(); ++} ++ ++struct irq_chip uv_irq_chip = { ++ .name = "UV-CORE", ++ .startup = uv_noop_ret, ++ .shutdown = uv_noop, ++ .enable = uv_noop, ++ .disable = uv_noop, ++ .ack = uv_noop, ++ .mask = uv_noop, ++ .unmask = uv_noop, ++ .eoi = uv_ack_apic, ++ .end = uv_noop, ++}; ++ ++/* ++ * Set up a mapping of an available irq and vector, and enable the specified ++ * MMR that defines the MSI that is to be sent to the specified CPU when an ++ * interrupt is raised. ++ */ ++int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, ++ unsigned long mmr_offset) ++{ ++ int irq; ++ int ret; ++ ++ irq = create_irq(); ++ if (irq < 0) ++ return -EBUSY; ++ ++ ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset); ++ if (ret != irq) ++ destroy_irq(irq); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(uv_setup_irq); ++ ++/* ++ * Tear down a mapping of an irq and vector, and disable the specified MMR that ++ * defined the MSI that was to be sent to the specified CPU when an interrupt ++ * was raised. ++ * ++ * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). ++ */ ++void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset) ++{ ++ arch_disable_uv_irq(mmr_blade, mmr_offset); ++ destroy_irq(irq); ++} ++EXPORT_SYMBOL_GPL(uv_teardown_irq); +--- /dev/null ++++ b/include/asm-x86/uv/uv_irq.h +@@ -0,0 +1,36 @@ ++/* ++ * This file is subject to the terms and conditions of the GNU General Public ++ * License. See the file "COPYING" in the main directory of this archive ++ * for more details. ++ * ++ * SGI UV IRQ definitions ++ * ++ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. ++ */ ++ ++#ifndef _ASM_X86_UV_UV_IRQ_H ++#define _ASM_X86_UV_UV_IRQ_H ++ ++/* If a generic version of this structure gets defined, eliminate this one. */ ++struct uv_IO_APIC_route_entry { ++ __u64 vector : 8, ++ delivery_mode : 3, ++ dest_mode : 1, ++ delivery_status : 1, ++ polarity : 1, ++ __reserved_1 : 1, ++ trigger : 1, ++ mask : 1, ++ __reserved_2 : 15, ++ dest : 32; ++}; ++ ++extern struct irq_chip uv_irq_chip; ++ ++extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long); ++extern void arch_disable_uv_irq(int, unsigned long); ++ ++extern int uv_setup_irq(char *, int, int, unsigned long); ++extern void uv_teardown_irq(unsigned int, int, unsigned long); ++ ++#endif /* _ASM_X86_UV_UV_IRQ_H */ diff --git a/src/patches/suse-2.6.27.25/patches.kernel.org/ipmi-section-conflict.diff b/src/patches/suse-2.6.27.25/patches.kernel.org/ipmi-section-conflict.diff new file mode 100644 index 0000000000..e1a9bc419c --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.kernel.org/ipmi-section-conflict.diff @@ -0,0 +1,82 @@ +From: Jeff Mahoney +Subject: [PATCH] ipmi: Fix section type conflicts + + Module parameters can't be static since the module macros explicitly + put those symbols in the __param section. It causes a section conflict + on ia64. This doesn't occur with standard types, since they are global + and exported. + +Signed-off-by: Jeff Mahoney +--- + + drivers/char/ipmi/ipmi_si_intf.c | 4 ++-- + drivers/char/ipmi/ipmi_watchdog.c | 10 +++++----- + 2 files changed, 7 insertions(+), 7 deletions(-) + +--- a/drivers/char/ipmi/ipmi_si_intf.c ++++ b/drivers/char/ipmi/ipmi_si_intf.c +@@ -1152,7 +1152,7 @@ static unsigned int num_slave_addrs; + #define IPMI_MEM_ADDR_SPACE 1 + static char *addr_space_to_str[] = { "i/o", "mem" }; + +-static int hotmod_handler(const char *val, struct kernel_param *kp); ++int hotmod_handler(const char *val, struct kernel_param *kp); + + module_param_call(hotmod, hotmod_handler, NULL, NULL, 0200); + MODULE_PARM_DESC(hotmod, "Add and remove interfaces. See" +@@ -1574,7 +1574,7 @@ static int check_hotmod_int_op(const cha + return 0; + } + +-static int hotmod_handler(const char *val, struct kernel_param *kp) ++int hotmod_handler(const char *val, struct kernel_param *kp) + { + char *str = kstrdup(val, GFP_KERNEL); + int rv; +--- a/drivers/char/ipmi/ipmi_watchdog.c ++++ b/drivers/char/ipmi/ipmi_watchdog.c +@@ -196,7 +196,7 @@ static void ipmi_unregister_watchdog(int + */ + static int start_now; + +-static int set_param_int(const char *val, struct kernel_param *kp) ++int set_param_int(const char *val, struct kernel_param *kp) + { + char *endp; + int l; +@@ -215,7 +215,7 @@ static int set_param_int(const char *val + return rv; + } + +-static int get_param_int(char *buffer, struct kernel_param *kp) ++int get_param_int(char *buffer, struct kernel_param *kp) + { + return sprintf(buffer, "%i", *((int *)kp->arg)); + } +@@ -227,7 +227,7 @@ static int preaction_op(const char *inva + static int preop_op(const char *inval, char *outval); + static void check_parms(void); + +-static int set_param_str(const char *val, struct kernel_param *kp) ++int set_param_str(const char *val, struct kernel_param *kp) + { + action_fn fn = (action_fn) kp->arg; + int rv = 0; +@@ -251,7 +251,7 @@ static int set_param_str(const char *val + return rv; + } + +-static int get_param_str(char *buffer, struct kernel_param *kp) ++int get_param_str(char *buffer, struct kernel_param *kp) + { + action_fn fn = (action_fn) kp->arg; + int rv; +@@ -263,7 +263,7 @@ static int get_param_str(char *buffer, s + } + + +-static int set_param_wdog_ifnum(const char *val, struct kernel_param *kp) ++int set_param_wdog_ifnum(const char *val, struct kernel_param *kp) + { + int rv = param_set_int(val, kp); + if (rv) diff --git a/src/patches/suse-2.6.27.25/patches.kernel.org/psmouse-section-conflict.diff b/src/patches/suse-2.6.27.25/patches.kernel.org/psmouse-section-conflict.diff new file mode 100644 index 0000000000..f62c89c8e5 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.kernel.org/psmouse-section-conflict.diff @@ -0,0 +1,47 @@ +From: Jeff Mahoney +Subject: [PATCH] psmouse: fix section type conflict + + Module parameters can't be static since the module macros explicitly + put those symbols in the __param section. It causes a section conflict + on ia64. This doesn't occur with standard types, since they are global + and exported. + +Signed-off-by: Jeff Mahoney + +--- + + drivers/input/mouse/psmouse-base.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/drivers/input/mouse/psmouse-base.c ++++ b/drivers/input/mouse/psmouse-base.c +@@ -36,8 +36,8 @@ MODULE_DESCRIPTION(DRIVER_DESC); + MODULE_LICENSE("GPL"); + + static unsigned int psmouse_max_proto = PSMOUSE_AUTO; +-static int psmouse_set_maxproto(const char *val, struct kernel_param *kp); +-static int psmouse_get_maxproto(char *buffer, struct kernel_param *kp); ++int psmouse_set_maxproto(const char *val, struct kernel_param *kp); ++int psmouse_get_maxproto(char *buffer, struct kernel_param *kp); + #define param_check_proto_abbrev(name, p) __param_check(name, p, unsigned int) + #define param_set_proto_abbrev psmouse_set_maxproto + #define param_get_proto_abbrev psmouse_get_maxproto +@@ -1573,7 +1573,8 @@ static ssize_t psmouse_attr_set_resoluti + } + + +-static int psmouse_set_maxproto(const char *val, struct kernel_param *kp) ++/* These two should be static, but it causes a section type conflict */ ++int psmouse_set_maxproto(const char *val, struct kernel_param *kp) + { + const struct psmouse_protocol *proto; + +@@ -1590,7 +1591,7 @@ static int psmouse_set_maxproto(const ch + return 0; + } + +-static int psmouse_get_maxproto(char *buffer, struct kernel_param *kp) ++int psmouse_get_maxproto(char *buffer, struct kernel_param *kp) + { + int type = *((unsigned int *)kp->arg); + diff --git a/src/patches/suse-2.6.27.25/patches.rpmify/cloneconfig.diff b/src/patches/suse-2.6.27.25/patches.rpmify/cloneconfig.diff new file mode 100644 index 0000000000..73dc910a0d --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.rpmify/cloneconfig.diff @@ -0,0 +1,37 @@ +From: Andreas Gruenbacher +Subject: Add ``cloneconfig'' target + +Cloneconfig takes the first configuration it finds which appears +to belong to the running kernel, and configures the kernel sources +to match this configuration as closely as possible. + +Signed-off-by: Andreas Gruenbacher + + scripts/kconfig/Makefile | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +--- a/scripts/kconfig/Makefile ++++ b/scripts/kconfig/Makefile +@@ -61,6 +61,22 @@ allnoconfig: $(obj)/conf + allmodconfig: $(obj)/conf + $< -m $(Kconfig) + ++UNAME_RELEASE := $(shell uname -r) ++CLONECONFIG := $(firstword $(wildcard /proc/config.gz \ ++ /lib/modules/$(UNAME_RELEASE)/.config \ ++ /etc/kernel-config \ ++ /boot/config-$(UNAME_RELEASE))) ++cloneconfig: $(obj)/conf ++ $(Q)case "$(CLONECONFIG)" in \ ++ '') echo -e "The configuration of the running" \ ++ "kernel could not be determined\n"; \ ++ false ;; \ ++ *.gz) gzip -cd $(CLONECONFIG) > .config.running ;; \ ++ *) cat $(CLONECONFIG) > .config.running ;; \ ++ esac && \ ++ echo -e "Cloning configuration file $(CLONECONFIG)\n" ++ $(Q)$< -D .config.running arch/$(SRCARCH)/Kconfig ++ + defconfig: $(obj)/conf + ifeq ($(KBUILD_DEFCONFIG),) + $< -d $(Kconfig) diff --git a/src/patches/suse-2.6.27.25/patches.suse/apm_setup_UP.diff b/src/patches/suse-2.6.27.25/patches.suse/apm_setup_UP.diff new file mode 100644 index 0000000000..b24c3366fa --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/apm_setup_UP.diff @@ -0,0 +1,57 @@ +From: Olaf Dabrunz +Subject: [apm] default to "power_off" when SMP kernel is used on single processor machines +Reference: SUSE221667 + +This patch turns on support for the APM power_off function by default when the +SMP kernel is used on single processor machines. + +It is a bit ugly to use a separate variable to make sure the default value is +only used when needed and the power_off variable is not initialized twice. But +I did not find a better way to do this with the way the current initialization +system works. + +Signed-off-by: Olaf Dabrunz + + + arch/x86/kernel/apm_32.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +--- a/arch/x86/kernel/apm_32.c ++++ b/arch/x86/kernel/apm_32.c +@@ -389,6 +389,7 @@ static int smp __read_mostly; + static int apm_disabled = -1; + #ifdef CONFIG_SMP + static int power_off; ++static int power_off_set; + #else + static int power_off = 1; + #endif +@@ -1797,6 +1798,14 @@ static int apm(void *unused) + } + } + ++#ifdef CONFIG_SMP ++ if (!power_off_set) { ++ power_off = (num_online_cpus() == 1); ++ /* remember not to initialize (with default value) again */ ++ power_off_set = 1; ++ } ++#endif ++ + /* Install our power off handler.. */ + if (power_off) + pm_power_off = apm_power_off; +@@ -1840,8 +1849,12 @@ static int __init apm_setup(char *str) + if (strncmp(str, "debug", 5) == 0) + debug = !invert; + if ((strncmp(str, "power-off", 9) == 0) || +- (strncmp(str, "power_off", 9) == 0)) ++ (strncmp(str, "power_off", 9) == 0)) { + power_off = !invert; ++#ifdef CONFIG_SMP ++ power_off_set = 1; ++#endif ++ } + if (strncmp(str, "smp", 3) == 0) { + smp = !invert; + idle_threshold = 100; diff --git a/src/patches/suse-2.6.27.25/patches.suse/crasher-26.diff b/src/patches/suse-2.6.27.25/patches.suse/crasher-26.diff new file mode 100644 index 0000000000..2815e0551e --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/crasher-26.diff @@ -0,0 +1,260 @@ +From: Chris Mason +Subject: slab testing module + +--- + drivers/char/Kconfig | 5 + + drivers/char/Makefile | 1 + drivers/char/crasher.c | 225 +++++++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 231 insertions(+) + +--- a/drivers/char/Kconfig ++++ b/drivers/char/Kconfig +@@ -1104,5 +1104,10 @@ config DEVPORT + + source "drivers/s390/char/Kconfig" + ++config CRASHER ++ tristate "Crasher Module" ++ help ++ Slab cache memory tester. Only use this as a module ++ + endmenu + +--- a/drivers/char/Makefile ++++ b/drivers/char/Makefile +@@ -105,6 +105,7 @@ obj-$(CONFIG_IPMI_HANDLER) += ipmi/ + + obj-$(CONFIG_HANGCHECK_TIMER) += hangcheck-timer.o + obj-$(CONFIG_TCG_TPM) += tpm/ ++obj-$(CONFIG_CRASHER) += crasher.o + + obj-$(CONFIG_PS3_FLASH) += ps3flash.o + +--- /dev/null ++++ b/drivers/char/crasher.c +@@ -0,0 +1,225 @@ ++/* ++ * crasher.c, it breaks things ++ */ ++ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int module_exiting; ++static struct completion startup = COMPLETION_INITIALIZER(startup); ++static unsigned long rand_seed = 152L; ++static unsigned long seed = 152L; ++static int threads = 1; ++static int call_panic; ++static int call_bug; ++static int trap_null, call_null, jump_null; ++static long trap_read, trap_write, call_bad, jump_bad; ++ ++module_param(seed, ulong, 0); ++module_param(call_panic, bool, 0); ++module_param(call_bug, bool, 0); ++module_param(trap_null, bool, 0); ++module_param(trap_read, long, 0); ++module_param(trap_write, long, 0); ++module_param(call_null, bool, 0); ++module_param(call_bad, long, 0); ++module_param(jump_null, bool, 0); ++module_param(jump_bad, long, 0); ++module_param(threads, int, 0); ++MODULE_PARM_DESC(seed, "random seed for memory tests"); ++MODULE_PARM_DESC(call_panic, "test option. call panic() and render the system unusable."); ++MODULE_PARM_DESC(call_bug, "test option. call BUG() and render the system unusable."); ++MODULE_PARM_DESC(trap_null, "test option. dereference a NULL pointer to simulate a crash and render the system unusable."); ++MODULE_PARM_DESC(trap_read, "test option. read from an invalid address to simulate a crash and render the system unusable."); ++MODULE_PARM_DESC(trap_write, "test option. write to an invalid address to simulate a crash and render the system unusable."); ++MODULE_PARM_DESC(call_null, "test option. call a NULL pointer to simulate a crash and render the system unusable."); ++MODULE_PARM_DESC(call_bad, "test option. call an invalid address to simulate a crash and render the system unusable."); ++MODULE_PARM_DESC(jump_null, "test option. jump to a NULL pointer to simulate a crash and render the system unusable."); ++MODULE_PARM_DESC(jump_bad, "test option. jump to an invalid address to simulate a crash and render the system unusable."); ++MODULE_PARM_DESC(threads, "number of threads to run"); ++MODULE_LICENSE("GPL"); ++ ++#define NUM_ALLOC 24 ++#define NUM_SIZES 8 ++static int sizes[] = { 32, 64, 128, 192, 256, 1024, 2048, 4096 }; ++ ++struct mem_buf { ++ char *buf; ++ int size; ++}; ++ ++static unsigned long crasher_random(void) ++{ ++ rand_seed = rand_seed*69069L+1; ++ return rand_seed^jiffies; ++} ++ ++void crasher_srandom(unsigned long entropy) ++{ ++ rand_seed ^= entropy; ++ crasher_random(); ++} ++ ++static char *mem_alloc(int size) { ++ char *p = kmalloc(size, GFP_KERNEL); ++ int i; ++ if (!p) ++ return p; ++ for (i = 0 ; i < size; i++) ++ p[i] = (i % 119) + 8; ++ return p; ++} ++ ++static void mem_check(char *p, int size) { ++ int i; ++ if (!p) ++ return; ++ for (i = 0 ; i < size; i++) { ++ if (p[i] != ((i % 119) + 8)) { ++ printk(KERN_CRIT "verify error at %lX offset %d " ++ " wanted %d found %d size %d\n", ++ (unsigned long)(p + i), i, (i % 119) + 8, ++ p[i], size); ++ } ++ } ++ // try and trigger slab poisoning for people using this buffer ++ // wrong ++ memset(p, 0, size); ++} ++ ++static void mem_verify(void) { ++ struct mem_buf bufs[NUM_ALLOC]; ++ struct mem_buf *b; ++ int index; ++ int size; ++ unsigned long sleep; ++ memset(bufs, 0, sizeof(struct mem_buf) * NUM_ALLOC); ++ while(!module_exiting) { ++ index = crasher_random() % NUM_ALLOC; ++ b = bufs + index; ++ if (b->size) { ++ mem_check(b->buf, b->size); ++ kfree(b->buf); ++ b->buf = NULL; ++ b->size = 0; ++ } else { ++ size = crasher_random() % NUM_SIZES; ++ size = sizes[size]; ++ b->buf = mem_alloc(size); ++ b->size = size; ++ } ++ sleep = crasher_random() % (HZ / 10); ++ set_current_state(TASK_INTERRUPTIBLE); ++ schedule_timeout(sleep); ++ set_current_state(TASK_RUNNING); ++ } ++ for (index = 0 ; index < NUM_ALLOC ; index++) { ++ b = bufs + index; ++ if (b->size) { ++ mem_check(b->buf, b->size); ++ kfree(b->buf); ++ } ++ } ++} ++ ++static int crasher_thread(void *unused) ++{ ++ daemonize("crasher"); ++ complete(&startup); ++ mem_verify(); ++ complete(&startup); ++ return 0; ++} ++ ++static int __init crasher_init(void) ++{ ++ int i; ++ init_completion(&startup); ++ crasher_srandom(seed); ++ ++ if (call_panic) { ++ panic("test panic from crasher module. Good Luck.\n"); ++ return -EFAULT; ++ } ++ if (call_bug) { ++ printk("triggering BUG\n"); ++ BUG_ON(1); ++ return -EFAULT; ++ } ++ ++ if (trap_null) { ++ volatile char *p = NULL; ++ printk("dereferencing NULL pointer.\n"); ++ p[0] = '\n'; ++ return -EFAULT; ++ } ++ if (trap_read) { ++ const volatile char *p = (char *)trap_read; ++ printk("reading from invalid(?) address %p.\n", p); ++ return p[0] ? -EFAULT : -EACCES; ++ } ++ if (trap_write) { ++ volatile char *p = (char *)trap_write; ++ printk("writing to invalid(?) address %p.\n", p); ++ p[0] = ' '; ++ return -EFAULT; ++ } ++ ++ if (call_null) { ++ void(*f)(void) = NULL; ++ printk("calling NULL pointer.\n"); ++ f(); ++ return -EFAULT; ++ } ++ if (call_bad) { ++ void(*f)(void) = (void(*)(void))call_bad; ++ printk("calling invalid(?) address %p.\n", f); ++ f(); ++ return -EFAULT; ++ } ++ ++ /* These two depend on the compiler doing tail call optimization. */ ++ if (jump_null) { ++ int(*f)(void) = NULL; ++ printk("jumping to NULL.\n"); ++ return f(); ++ } ++ if (jump_bad) { ++ int(*f)(void) = (int(*)(void))jump_bad; ++ printk("jumping to invalid(?) address %p.\n", f); ++ return f(); ++ } ++ ++ printk("crasher module (%d threads). Testing sizes: ", threads); ++ for (i = 0 ; i < NUM_SIZES ; i++) ++ printk("%d ", sizes[i]); ++ printk("\n"); ++ ++ for (i = 0 ; i < threads ; i++) ++ kernel_thread(crasher_thread, crasher_thread, ++ CLONE_FS | CLONE_FILES); ++ for (i = 0 ; i < threads ; i++) ++ wait_for_completion(&startup); ++ return 0; ++} ++ ++static void __exit crasher_exit(void) ++{ ++ int i; ++ module_exiting = 1; ++ for (i = 0 ; i < threads ; i++) ++ wait_for_completion(&startup); ++ printk("all crasher threads done\n"); ++ return; ++} ++ ++module_init(crasher_init); ++module_exit(crasher_exit); diff --git a/src/patches/suse-2.6.27.25/patches.suse/file-capabilities-add-file_caps-switch.diff b/src/patches/suse-2.6.27.25/patches.suse/file-capabilities-add-file_caps-switch.diff new file mode 100644 index 0000000000..e12aba6ffe --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/file-capabilities-add-file_caps-switch.diff @@ -0,0 +1,106 @@ +From: Andreas Gruenbacher +Subject: file capabilities: add file_caps switch + +Based on a patch from Serge Hallyn : + +Add a file_caps boot option when file capabilities are +compiled into the kernel (CONFIG_SECURITY_FILE_CAPABILITIES=y). + +This allows distributions to ship a kernel with file capabilities +compiled in, without forcing users to use (and understand and +trust) them. + +When file_caps=0 is specified at boot, then when a process executes +a file, any file capabilities stored with that file will not be +used in the calculation of the process' new capability sets. + +This means that booting with the file_caps=0 boot option will +not be the same as booting a kernel with file capabilities +compiled out - in particular a task with CAP_SETPCAP will not +have any chance of passing capabilities to another task (which +isn't "really" possible anyway, and which may soon by killed +altogether by David Howells in any case), and it will instead +be able to put new capabilities in its pI. However since fI +will always be empty and pI is masked with fI, it gains the +task nothing. + +We also support the extra prctl options, setting securebits and +dropping capabilities from the per-process bounding set. + +The other remaining difference is that killpriv, task_setscheduler, +setioprio, and setnice will continue to be hooked. That will +be noticable in the case where a root task changed its uid +while keeping some caps, and another task owned by the new uid +tries to change settings for the more privileged task. + +Signed-off-by: Andreas Gruenbacher + +--- + Documentation/kernel-parameters.txt | 9 +++++++++ + include/linux/capability.h | 3 +++ + kernel/capability.c | 11 +++++++++++ + security/commoncap.c | 3 +++ + 4 files changed, 26 insertions(+) + +--- a/Documentation/kernel-parameters.txt ++++ b/Documentation/kernel-parameters.txt +@@ -1396,6 +1396,15 @@ and is between 256 and 4096 characters. + instruction doesn't work correctly and not to + use it. + ++ file_caps= Tells the kernel whether to honor file capabilities. ++ When disabled, the only way then for a file to be ++ executed with privilege is to be setuid root or executed ++ by root. ++ Format: {"0" | "1"} ++ 0 -- ignore file capabilities. ++ 1 -- honor file capabilities. ++ Default value is 1. ++ + nohalt [IA-64] Tells the kernel not to use the power saving + function PAL_HALT_LIGHT when idle. This increases + power-consumption. On the positive side, it reduces +--- a/include/linux/capability.h ++++ b/include/linux/capability.h +@@ -68,6 +68,9 @@ typedef struct __user_cap_data_struct { + #define VFS_CAP_U32 VFS_CAP_U32_2 + #define VFS_CAP_REVISION VFS_CAP_REVISION_2 + ++#ifdef CONFIG_SECURITY_FILE_CAPABILITIES ++extern int file_caps_enabled; ++#endif + + struct vfs_cap_data { + __le32 magic_etc; /* Little endian */ +--- a/kernel/capability.c ++++ b/kernel/capability.c +@@ -33,6 +33,17 @@ EXPORT_SYMBOL(__cap_empty_set); + EXPORT_SYMBOL(__cap_full_set); + EXPORT_SYMBOL(__cap_init_eff_set); + ++#ifdef CONFIG_SECURITY_FILE_CAPABILITIES ++int file_caps_enabled = 1; ++ ++static int __init setup_file_caps(char *str) ++{ ++ get_option(&str, &file_caps_enabled); ++ return 1; ++} ++__setup("file_caps=", setup_file_caps); ++#endif ++ + /* + * More recent versions of libcap are available from: + * +--- a/security/commoncap.c ++++ b/security/commoncap.c +@@ -281,6 +281,9 @@ static int get_file_caps(struct linux_bi + + bprm_clear_caps(bprm); + ++ if (!file_caps_enabled) ++ return 0; ++ + if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) + return 0; + diff --git a/src/patches/suse-2.6.27.25/patches.suse/file-capabilities-disable-by-default.diff b/src/patches/suse-2.6.27.25/patches.suse/file-capabilities-disable-by-default.diff new file mode 100644 index 0000000000..e315606820 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/file-capabilities-disable-by-default.diff @@ -0,0 +1,35 @@ +From: Andreas Gruenbacher +Subject: Disable file capabilities by default + +Disable file capabilities by default: we are still lacking documentation +and file capability awareness in system management tools. + +Signed-off-by: Andreas Gruenbacher + +--- + Documentation/kernel-parameters.txt | 2 +- + kernel/capability.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/Documentation/kernel-parameters.txt ++++ b/Documentation/kernel-parameters.txt +@@ -1403,7 +1403,7 @@ and is between 256 and 4096 characters. + Format: {"0" | "1"} + 0 -- ignore file capabilities. + 1 -- honor file capabilities. +- Default value is 1. ++ Default value is 0. + + nohalt [IA-64] Tells the kernel not to use the power saving + function PAL_HALT_LIGHT when idle. This increases +--- a/kernel/capability.c ++++ b/kernel/capability.c +@@ -34,7 +34,7 @@ EXPORT_SYMBOL(__cap_full_set); + EXPORT_SYMBOL(__cap_init_eff_set); + + #ifdef CONFIG_SECURITY_FILE_CAPABILITIES +-int file_caps_enabled = 1; ++int file_caps_enabled; + + static int __init setup_file_caps(char *str) + { diff --git a/src/patches/suse-2.6.27.25/patches.suse/fs-knows-MAY_APPEND.diff b/src/patches/suse-2.6.27.25/patches.suse/fs-knows-MAY_APPEND.diff new file mode 100644 index 0000000000..3f65c630d7 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/fs-knows-MAY_APPEND.diff @@ -0,0 +1,59 @@ +From: Andreas Gruenbacher +Subject: Allow filesystems to handle MAY_APPEND + +The MS_WITHAPPEND super_block flag tells the vfs that the permission +inode operation understands the MAY_APPEND flag. This is required for +implementing permission models which go beyond the traditional UNIX +semantics. + +If a filesystem does not set the flag, the behavior is unchanged. + +Signed-off-by: Andreas Gruenbacher + +--- + fs/namei.c | 6 +++++- + include/linux/fs.h | 2 ++ + 2 files changed, 7 insertions(+), 1 deletion(-) + +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -230,6 +230,7 @@ int generic_permission(struct inode *ino + int inode_permission(struct inode *inode, int mask) + { + int retval; ++ int submask = mask; + + if (mask & MAY_WRITE) { + umode_t mode = inode->i_mode; +@@ -248,9 +249,12 @@ int inode_permission(struct inode *inode + return -EACCES; + } + ++ if (!IS_WITHAPPEND(inode)) ++ submask &= ~MAY_APPEND; ++ + /* Ordinary permission routines do not understand MAY_APPEND. */ + if (inode->i_op && inode->i_op->permission) { +- retval = inode->i_op->permission(inode, mask); ++ retval = inode->i_op->permission(inode, submask); + if (!retval) { + /* + * Exec permission on a regular file is denied if none +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -133,6 +133,7 @@ extern int dir_notify_enable; + #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ + #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ + #define MS_I_VERSION (1<<23) /* Update inode I_version field */ ++#define MS_WITHAPPEND (1<<24) /* iop->permission() understands MAY_APPEND */ + #define MS_ACTIVE (1<<30) + #define MS_NOUSER (1<<31) + +@@ -183,6 +184,7 @@ extern int dir_notify_enable; + #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) + #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) + #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) ++#define IS_WITHAPPEND(inode) __IS_FLG(inode, MS_WITHAPPEND) + + #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) + #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) diff --git a/src/patches/suse-2.6.27.25/patches.suse/fs-may_iops.diff b/src/patches/suse-2.6.27.25/patches.suse/fs-may_iops.diff new file mode 100644 index 0000000000..9ee1ae62ea --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/fs-may_iops.diff @@ -0,0 +1,144 @@ +From: Andreas Gruenbacher +Subject: VFS hooks for per-filesystem permission models + +Add may_create and may_delete inode operations that filesystems can +implement in order to override the vfs provided default behavior. +This is required for implementing permission models which go beyond +the traditional UNIX semantics. + +If a filesystem does not implement these hooks, the behavior is +unchanged. + +Signed-off-by: Andreas Gruenbacher + +--- + fs/namei.c | 48 +++++++++++++++++++++++++++++++++++++----------- + include/linux/fs.h | 2 ++ + 2 files changed, 39 insertions(+), 11 deletions(-) + +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -1402,13 +1402,24 @@ static int may_delete(struct inode *dir, + BUG_ON(victim->d_parent->d_inode != dir); + audit_inode_child(victim->d_name.name, victim, dir); + +- error = inode_permission(dir, MAY_WRITE | MAY_EXEC); ++ if (dir->i_op->may_delete) { ++ if (IS_RDONLY(dir)) ++ return -EROFS; ++ if (IS_IMMUTABLE(dir)) ++ return -EACCES; ++ error = dir->i_op->may_delete(dir, victim->d_inode); ++ if (!error) ++ error = security_inode_permission(dir, MAY_WRITE | MAY_EXEC); ++ } else { ++ error = inode_permission(dir, MAY_WRITE | MAY_EXEC); ++ if (!error && check_sticky(dir, victim->d_inode)) ++ error = -EPERM; ++ } + if (error) + return error; + if (IS_APPEND(dir)) + return -EPERM; +- if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| +- IS_IMMUTABLE(victim->d_inode)) ++ if (IS_APPEND(victim->d_inode) || IS_IMMUTABLE(victim->d_inode)) + return -EPERM; + if (isdir) { + if (!S_ISDIR(victim->d_inode->i_mode)) +@@ -1432,13 +1443,28 @@ static int may_delete(struct inode *dir, + * 3. We should have write and exec permissions on dir + * 4. We can't do it if dir is immutable (done in permission()) + */ +-static inline int may_create(struct inode *dir, struct dentry *child) ++static inline int may_create(struct inode *dir, struct dentry *child, ++ int isdir) + { ++ int error; ++ + if (child->d_inode) + return -EEXIST; + if (IS_DEADDIR(dir)) + return -ENOENT; +- return inode_permission(dir, MAY_WRITE | MAY_EXEC); ++ ++ if (dir->i_op->may_create) { ++ if (IS_RDONLY(dir)) ++ return -EROFS; ++ if (IS_IMMUTABLE(dir)) ++ return -EACCES; ++ error = dir->i_op->may_create(dir, isdir); ++ if (!error) ++ error = security_inode_permission(dir, MAY_WRITE | MAY_EXEC); ++ } else ++ error = inode_permission(dir, MAY_WRITE | MAY_EXEC); ++ ++ return error; + } + + /* +@@ -1504,7 +1530,7 @@ void unlock_rename(struct dentry *p1, st + int vfs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) + { +- int error = may_create(dir, dentry); ++ int error = may_create(dir, dentry, 0); + + if (error) + return error; +@@ -1948,7 +1974,7 @@ EXPORT_SYMBOL_GPL(lookup_create); + + int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) + { +- int error = may_create(dir, dentry); ++ int error = may_create(dir, dentry, 0); + + if (error) + return error; +@@ -2049,7 +2075,7 @@ SYSCALL_DEFINE3(mknod, const char __user + + int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) + { +- int error = may_create(dir, dentry); ++ int error = may_create(dir, dentry, 1); + + if (error) + return error; +@@ -2316,7 +2342,7 @@ SYSCALL_DEFINE1(unlink, const char __use + + int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) + { +- int error = may_create(dir, dentry); ++ int error = may_create(dir, dentry, 0); + + if (error) + return error; +@@ -2386,7 +2412,7 @@ int vfs_link(struct dentry *old_dentry, + if (!inode) + return -ENOENT; + +- error = may_create(dir, new_dentry); ++ error = may_create(dir, new_dentry, S_ISDIR(inode->i_mode)); + if (error) + return error; + +@@ -2594,7 +2620,7 @@ int vfs_rename(struct inode *old_dir, st + return error; + + if (!new_dentry->d_inode) +- error = may_create(new_dir, new_dentry); ++ error = may_create(new_dir, new_dentry, is_dir); + else + error = may_delete(new_dir, new_dentry, is_dir); + if (error) +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1293,6 +1293,8 @@ struct inode_operations { + void (*put_link) (struct dentry *, struct nameidata *, void *); + void (*truncate) (struct inode *); + int (*permission) (struct inode *, int); ++ int (*may_create) (struct inode *, int); ++ int (*may_delete) (struct inode *, struct inode *); + int (*setattr) (struct dentry *, struct iattr *); + int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); + int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); diff --git a/src/patches/suse-2.6.27.25/patches.suse/genksyms-add-override-flag.diff b/src/patches/suse-2.6.27.25/patches.suse/genksyms-add-override-flag.diff new file mode 100644 index 0000000000..c0a2ea50ea --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/genksyms-add-override-flag.diff @@ -0,0 +1,126 @@ +From: Andreas Gruenbacher +Subject: genksyms: add --override flag + +Add --override flag to genksyms to allow overriding types with old +definitions using the 'override' keyword. This is similar to -p --preserve, +but it doesn't abort the build if a symtype cannot be preserved + +[mmarek: added KBUILD_OVERRIDE env var to set this globally for the entire + build] +--- + scripts/genksyms/genksyms.c | 21 +++++++++++++++------ + 1 file changed, 15 insertions(+), 6 deletions(-) + +Index: b/scripts/genksyms/genksyms.c +=================================================================== +--- a/scripts/genksyms/genksyms.c ++++ b/scripts/genksyms/genksyms.c +@@ -43,7 +43,7 @@ int cur_line = 1; + char *cur_filename; + + static int flag_debug, flag_dump_defs, flag_reference, flag_dump_types, +- flag_preserve, flag_warnings; ++ flag_override, flag_preserve, flag_warnings; + static const char *arch = ""; + static const char *mod_prefix = ""; + +@@ -200,7 +200,7 @@ struct symbol *__add_symbol(const char * + sym->is_declared = 1; + return sym; + } else if (!sym->is_declared) { +- if (sym->is_override && flag_preserve) { ++ if (sym->is_override && flag_override) { + print_location(); + fprintf(stderr, "ignoring "); + print_type_name(type, name); +@@ -586,11 +586,13 @@ void export_symbol(const char *name) + struct symbol *n = sym->expansion_trail; + + if (sym->status != STATUS_UNCHANGED) { ++ int fail = sym->is_override && flag_preserve; ++ + if (!has_changed) { + print_location(); + fprintf(stderr, "%s: %s: modversion " + "changed because of changes " +- "in ", flag_preserve ? "error" : ++ "in ", fail ? "error" : + "warning", name); + } else + fprintf(stderr, ", "); +@@ -598,7 +600,7 @@ void export_symbol(const char *name) + if (sym->status == STATUS_DEFINED) + fprintf(stderr, " (became defined)"); + has_changed = 1; +- if (flag_preserve) ++ if (fail) + errors++; + } + sym->expansion_trail = 0; +@@ -655,6 +657,7 @@ static void genksyms_usage(void) + " -D, --dump Dump expanded symbol defs (for debugging only)\n" + " -r, --reference file Read reference symbols from a file\n" + " -T, --dump-types file Dump expanded types into file\n" ++ " -o, --override Allow to override reference modversions\n" + " -p, --preserve Preserve reference modversions or fail\n" + " -w, --warnings Enable warnings\n" + " -q, --quiet Disable warnings (default)\n" +@@ -666,6 +669,7 @@ static void genksyms_usage(void) + " -D Dump expanded symbol defs (for debugging only)\n" + " -r file Read reference symbols from a file\n" + " -T file Dump expanded types into file\n" ++ " -o Allow to override reference modversions\n" + " -p Preserve reference modversions or fail\n" + " -w Enable warnings\n" + " -q Disable warnings (default)\n" +@@ -690,15 +694,16 @@ int main(int argc, char **argv) + {"reference", 1, 0, 'r'}, + {"dump-types", 1, 0, 'T'}, + {"preserve", 0, 0, 'p'}, ++ {"override", 0, 0, 'o'}, + {"version", 0, 0, 'V'}, + {"help", 0, 0, 'h'}, + {0, 0, 0, 0} + }; + +- while ((o = getopt_long(argc, argv, "a:dwqVDr:T:ph", ++ while ((o = getopt_long(argc, argv, "a:dwqVDr:T:oph", + &long_opts[0], NULL)) != EOF) + #else /* __GNU_LIBRARY__ */ +- while ((o = getopt(argc, argv, "a:dwqVDr:T:ph")) != EOF) ++ while ((o = getopt(argc, argv, "a:dwqVDr:T:oph")) != EOF) + #endif /* __GNU_LIBRARY__ */ + switch (o) { + case 'a': +@@ -735,7 +740,11 @@ int main(int argc, char **argv) + return 1; + } + break; ++ case 'o': ++ flag_override = 1; ++ break; + case 'p': ++ flag_override = 1; + flag_preserve = 1; + break; + case 'h': +Index: b/scripts/Makefile.build +=================================================================== +--- a/scripts/Makefile.build ++++ b/scripts/Makefile.build +@@ -159,6 +159,7 @@ cmd_cc_symtypes_c = \ + -r $(firstword $(wildcard \ + $(@:.symtypes=.symref) /dev/null)) \ + $(if $(KBUILD_PRESERVE),-p) \ ++ $(if $(KBUILD_OVERRIDE),-o) \ + -a $(ARCH) \ + >/dev/null; \ + test -s $@ || rm -f $@ +@@ -197,6 +198,7 @@ cmd_modversions = \ + -r $(firstword $(wildcard \ + $(@:.o=.symref) /dev/null)) \ + $(if $(KBUILD_PRESERVE),-p) \ ++ $(if $(KBUILD_OVERRIDE),-o) \ + -a $(ARCH) \ + > $(@D)/.tmp_$(@F:.o=.ver); \ + \ diff --git a/src/patches/suse-2.6.27.25/patches.suse/genksyms-override.diff b/src/patches/suse-2.6.27.25/patches.suse/genksyms-override.diff new file mode 100644 index 0000000000..1b77b4a36f --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/genksyms-override.diff @@ -0,0 +1,110 @@ +From: Andreas Gruenbacher +Subject: genksyms: allow to ignore symbol checksum changes + +This adds an "override" keyword for use in *.symvers / *.symref files. When a +symbol is overridden, the symbol's old definition will be used for computing +checksums instead of the new one, preserving the previous checksum. (Genksyms +will still warn about the change.) + +This is meant to allow distributions to hide minor actual as well as fake ABI +changes. (For example, when extra type information becomes available because +additional headers are included, this may change checksums even though none of +the types used have actully changed.) + +This approach also allows to get rid of "#ifdef __GENKSYMS__" hacks in the code, +which are currently used in some vendor kernels to work around checksum changes. + +Signed-off-by: Andreas Gruenbacher + +--- + scripts/genksyms/genksyms.c | 34 ++++++++++++++++++++++++++++++---- + scripts/genksyms/genksyms.h | 1 + + 2 files changed, 31 insertions(+), 4 deletions(-) + +--- a/scripts/genksyms/genksyms.c ++++ b/scripts/genksyms/genksyms.c +@@ -191,11 +191,26 @@ struct symbol *__add_symbol(const char * + /* fall through */ ; + else if (sym->type == type && + equal_list(sym->defn, defn)) { ++ if (!sym->is_declared && sym->is_override) { ++ print_location(); ++ print_type_name(type, name); ++ fprintf(stderr, " modversion is " ++ "unchanged\n"); ++ } + sym->is_declared = 1; + return sym; + } else if (!sym->is_declared) { +- status = is_unknown_symbol(sym) ? +- STATUS_DEFINED : STATUS_MODIFIED; ++ if (sym->is_override && flag_preserve) { ++ print_location(); ++ fprintf(stderr, "ignoring "); ++ print_type_name(type, name); ++ fprintf(stderr, " modversion change\n"); ++ sym->is_declared = 1; ++ return sym; ++ } else { ++ status = is_unknown_symbol(sym) ? ++ STATUS_DEFINED : STATUS_MODIFIED; ++ } + } else { + error_with_pos("redefinition of %s", name); + return sym; +@@ -229,6 +244,7 @@ struct symbol *__add_symbol(const char * + + sym->is_declared = !is_reference; + sym->status = status; ++ sym->is_override = 0; + + if (flag_debug) { + fprintf(debugfile, "Defn for %s %s == <", +@@ -348,9 +364,16 @@ static void read_reference(FILE *f) + while (!feof(f)) { + struct string_list *defn = NULL; + struct string_list *sym, *def; +- int is_extern = 0; ++ int is_extern = 0, is_override = 0; ++ struct symbol *subsym; + + sym = read_node(f); ++ if (sym && sym->tag == SYM_NORMAL && ++ !strcmp(sym->string, "override")) { ++ is_override = 1; ++ free_node(sym); ++ sym = read_node(f); ++ } + if (!sym) + continue; + def = read_node(f); +@@ -365,8 +388,9 @@ static void read_reference(FILE *f) + defn = def; + def = read_node(f); + } +- add_reference_symbol(xstrdup(sym->string), sym->tag, ++ subsym = add_reference_symbol(xstrdup(sym->string), sym->tag, + defn, is_extern); ++ subsym->is_override = is_override; + free_node(sym); + } + } +@@ -743,6 +767,8 @@ int main(int argc, char **argv) + while (visited_symbols != (struct symbol *)-1L) { + struct symbol *sym = visited_symbols; + ++ if (sym->is_override) ++ fputs("override ", dumpfile); + if (sym->type != SYM_NORMAL) { + putc(symbol_type_name[sym->type][0], dumpfile); + putc('#', dumpfile); +--- a/scripts/genksyms/genksyms.h ++++ b/scripts/genksyms/genksyms.h +@@ -49,6 +49,7 @@ struct symbol { + int is_extern; + int is_declared; + enum symbol_status status; ++ int is_override; + }; + + typedef struct string_list **yystype; diff --git a/src/patches/suse-2.6.27.25/patches.suse/genksyms-reference.diff b/src/patches/suse-2.6.27.25/patches.suse/genksyms-reference.diff new file mode 100644 index 0000000000..9f440b982c --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/genksyms-reference.diff @@ -0,0 +1,484 @@ +From: Andreas Gruenbacher +Subject: genksyms: track symbol checksum changes + +Sometimes it is preferable to avoid changes of exported symbol checksums (to +avoid breaking externally provided modules). When a checksum change occurs, it +can be hard to figure out what caused this change: underlying types may have +changed, or additional type information may simply have become available at the +point where a symbol is exported. + +Add a new --reference option to genksyms which allows it to report why +checksums change, based on the type information dumps it creates with +the --dump-types flag. Genksyms will read in such a dump from a previous run, +and report which symbols have changed (and why). + +The behavior can be controlled for an entire build as follows: If +KBUILD_SYMTYPES is set, genksyms uses --dump-types to produce *.symtypes dump +files. If any *.symref files exist, those will be used as the reference to +check against. If KBUILD_PRESERVE is set, checksum changes will fail the +build. + +Signed-off-by: Andreas Gruenbacher + +--- + scripts/Makefile.build | 16 ++ + scripts/genksyms/genksyms.c | 236 +++++++++++++++++++++++++++++++++++++++++--- + scripts/genksyms/genksyms.h | 6 + + 3 files changed, 239 insertions(+), 19 deletions(-) + +--- a/scripts/Makefile.build ++++ b/scripts/Makefile.build +@@ -153,12 +153,18 @@ $(obj)/%.i: $(src)/%.c FORCE + + quiet_cmd_cc_symtypes_c = SYM $(quiet_modtag) $@ + cmd_cc_symtypes_c = \ ++ set -e; \ + $(CPP) -D__GENKSYMS__ $(c_flags) $< \ +- | $(GENKSYMS) -T $@ >/dev/null; \ ++ | $(GENKSYMS) -T $@ \ ++ -r $(firstword $(wildcard \ ++ $(@:.symtypes=.symref) /dev/null)) \ ++ $(if $(KBUILD_PRESERVE),-p) \ ++ -a $(ARCH) \ ++ >/dev/null; \ + test -s $@ || rm -f $@ + + $(obj)/%.symtypes : $(src)/%.c FORCE +- $(call if_changed_dep,cc_symtypes_c) ++ $(call cmd,cc_symtypes_c) + + # C (.c) files + # The C file is compiled and updated dependency information is generated. +@@ -187,7 +193,11 @@ cmd_modversions = \ + if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + $(CPP) -D__GENKSYMS__ $(c_flags) $< \ + | $(GENKSYMS) $(if $(KBUILD_SYMTYPES), \ +- -T $(@D)/$(@F:.o=.symtypes)) -a $(ARCH) \ ++ -T $(@:.o=.symtypes)) \ ++ -r $(firstword $(wildcard \ ++ $(@:.o=.symref) /dev/null)) \ ++ $(if $(KBUILD_PRESERVE),-p) \ ++ -a $(ARCH) \ + > $(@D)/.tmp_$(@F:.o=.ver); \ + \ + $(LD) $(LDFLAGS) -r -o $@ $(@D)/.tmp_$(@F) \ +--- a/scripts/genksyms/genksyms.c ++++ b/scripts/genksyms/genksyms.c +@@ -42,7 +42,8 @@ static FILE *debugfile; + int cur_line = 1; + char *cur_filename; + +-static int flag_debug, flag_dump_defs, flag_dump_types, flag_warnings; ++static int flag_debug, flag_dump_defs, flag_reference, flag_dump_types, ++ flag_preserve, flag_warnings; + static const char *arch = ""; + static const char *mod_prefix = ""; + +@@ -58,6 +59,8 @@ static const char *const symbol_type_nam + + static int equal_list(struct string_list *a, struct string_list *b); + static void print_list(FILE * f, struct string_list *list); ++static void print_location(void); ++static void print_type_name(enum symbol_type type, const char *name); + + /*----------------------------------------------------------------------*/ + +@@ -151,25 +154,66 @@ struct symbol *find_symbol(const char *n + + for (sym = symtab[h]; sym; sym = sym->hash_next) + if (map_to_ns(sym->type) == map_to_ns(ns) && +- strcmp(name, sym->name) == 0) ++ strcmp(name, sym->name) == 0 && ++ sym->is_declared) + break; + + return sym; + } + +-struct symbol *add_symbol(const char *name, enum symbol_type type, +- struct string_list *defn, int is_extern) ++static int is_unknown_symbol(struct symbol *sym) ++{ ++ struct string_list *defn; ++ ++ return ((sym->type == SYM_STRUCT || ++ sym->type == SYM_UNION || ++ sym->type == SYM_ENUM) && ++ (defn = sym->defn) && defn->tag == SYM_NORMAL && ++ strcmp(defn->string, "}") == 0 && ++ (defn = defn->next) && defn->tag == SYM_NORMAL && ++ strcmp(defn->string, "UNKNOWN") == 0 && ++ (defn = defn->next) && defn->tag == SYM_NORMAL && ++ strcmp(defn->string, "{") == 0); ++} ++ ++struct symbol *__add_symbol(const char *name, enum symbol_type type, ++ struct string_list *defn, int is_extern, ++ int is_reference) + { + unsigned long h = crc32(name) % HASH_BUCKETS; + struct symbol *sym; ++ enum symbol_status status = STATUS_UNCHANGED; + + for (sym = symtab[h]; sym; sym = sym->hash_next) { +- if (map_to_ns(sym->type) == map_to_ns(type) +- && strcmp(name, sym->name) == 0) { +- if (!equal_list(sym->defn, defn)) ++ if (map_to_ns(sym->type) == map_to_ns(type) && ++ strcmp(name, sym->name) == 0) { ++ if (is_reference) ++ /* fall through */ ; ++ else if (sym->type == type && ++ equal_list(sym->defn, defn)) { ++ sym->is_declared = 1; ++ return sym; ++ } else if (!sym->is_declared) { ++ status = is_unknown_symbol(sym) ? ++ STATUS_DEFINED : STATUS_MODIFIED; ++ } else { + error_with_pos("redefinition of %s", name); +- return sym; ++ return sym; ++ } ++ break; ++ } ++ } ++ ++ if (sym) { ++ struct symbol **psym; ++ ++ for (psym = &symtab[h]; *psym; psym = &(*psym)->hash_next) { ++ if (*psym == sym) { ++ *psym = sym->hash_next; ++ break; ++ } + } ++ --nsyms; + } + + sym = xmalloc(sizeof(*sym)); +@@ -183,6 +227,9 @@ struct symbol *add_symbol(const char *na + sym->hash_next = symtab[h]; + symtab[h] = sym; + ++ sym->is_declared = !is_reference; ++ sym->status = status; ++ + if (flag_debug) { + fprintf(debugfile, "Defn for %s %s == <", + symbol_type_name[type], name); +@@ -196,6 +243,18 @@ struct symbol *add_symbol(const char *na + return sym; + } + ++struct symbol *add_symbol(const char *name, enum symbol_type type, ++ struct string_list *defn, int is_extern) ++{ ++ return __add_symbol(name, type, defn, is_extern, 0); ++} ++ ++struct symbol *add_reference_symbol(const char *name, enum symbol_type type, ++ struct string_list *defn, int is_extern) ++{ ++ return __add_symbol(name, type, defn, is_extern, 1); ++} ++ + /*----------------------------------------------------------------------*/ + + void free_node(struct string_list *node) +@@ -236,6 +295,82 @@ static int equal_list(struct string_list + return !a && !b; + } + ++#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) ++ ++struct string_list *read_node(FILE *f) ++{ ++ char buffer[256]; ++ struct string_list node = { ++ .string = buffer, ++ .tag = SYM_NORMAL }; ++ int c; ++ ++ while ((c = fgetc(f)) != EOF) { ++ if (c == ' ') { ++ if (node.string == buffer) ++ continue; ++ break; ++ } else if (c == '\n') { ++ if (node.string == buffer) ++ return NULL; ++ ungetc(c, f); ++ break; ++ } ++ if (node.string >= buffer + sizeof(buffer) - 1) { ++ fprintf(stderr, "Token too long\n"); ++ exit(1); ++ } ++ *node.string++ = c; ++ } ++ if (node.string == buffer) ++ return NULL; ++ *node.string = 0; ++ node.string = buffer; ++ ++ if (node.string[1] == '#') { ++ int n; ++ ++ for (n = 0; n < ARRAY_SIZE(symbol_type_name); n++) { ++ if (node.string[0] == symbol_type_name[n][0]) { ++ node.tag = n; ++ node.string += 2; ++ return copy_node(&node); ++ } ++ } ++ fprintf(stderr, "Unknown type %c\n", node.string[0]); ++ exit(1); ++ } ++ return copy_node(&node); ++} ++ ++static void read_reference(FILE *f) ++{ ++ while (!feof(f)) { ++ struct string_list *defn = NULL; ++ struct string_list *sym, *def; ++ int is_extern = 0; ++ ++ sym = read_node(f); ++ if (!sym) ++ continue; ++ def = read_node(f); ++ if (def && def->tag == SYM_NORMAL && ++ !strcmp(def->string, "extern")) { ++ is_extern = 1; ++ free_node(def); ++ def = read_node(f); ++ } ++ while (def) { ++ def->next = defn; ++ defn = def; ++ def = read_node(f); ++ } ++ add_reference_symbol(xstrdup(sym->string), sym->tag, ++ defn, is_extern); ++ free_node(sym); ++ } ++} ++ + static void print_node(FILE * f, struct string_list *list) + { + if (list->tag != SYM_NORMAL) { +@@ -311,6 +446,7 @@ static unsigned long expand_and_crc_sym( + + case SYM_TYPEDEF: + subsym = find_symbol(cur->string, cur->tag); ++ /* FIXME: Bad reference files can segfault here. */ + if (subsym->expansion_trail) { + if (flag_dump_defs) + fprintf(debugfile, "%s ", cur->string); +@@ -347,9 +483,22 @@ static unsigned long expand_and_crc_sym( + t = n; + + n = xmalloc(sizeof(*n)); +- n->string = xstrdup("{ UNKNOWN }"); ++ n->string = xstrdup("{"); ++ n->tag = SYM_NORMAL; ++ n->next = t; ++ t = n; ++ ++ n = xmalloc(sizeof(*n)); ++ n->string = xstrdup("UNKNOWN"); ++ n->tag = SYM_NORMAL; ++ n->next = t; ++ t = n; ++ ++ n = xmalloc(sizeof(*n)); ++ n->string = xstrdup("}"); + n->tag = SYM_NORMAL; + n->next = t; ++ t = n; + + subsym = + add_symbol(cur->string, cur->tag, n, 0); +@@ -397,20 +546,42 @@ void export_symbol(const char *name) + error_with_pos("export undefined symbol %s", name); + else { + unsigned long crc; ++ int has_changed = 0; + + if (flag_dump_defs) + fprintf(debugfile, "Export %s == <", name); + + expansion_trail = (struct symbol *)-1L; + ++ sym->expansion_trail = expansion_trail; ++ expansion_trail = sym; + crc = expand_and_crc_sym(sym, 0xffffffff) ^ 0xffffffff; + + sym = expansion_trail; + while (sym != (struct symbol *)-1L) { + struct symbol *n = sym->expansion_trail; ++ ++ if (sym->status != STATUS_UNCHANGED) { ++ if (!has_changed) { ++ print_location(); ++ fprintf(stderr, "%s: %s: modversion " ++ "changed because of changes " ++ "in ", flag_preserve ? "error" : ++ "warning", name); ++ } else ++ fprintf(stderr, ", "); ++ print_type_name(sym->type, sym->name); ++ if (sym->status == STATUS_DEFINED) ++ fprintf(stderr, " (became defined)"); ++ has_changed = 1; ++ if (flag_preserve) ++ errors++; ++ } + sym->expansion_trail = 0; + sym = n; + } ++ if (has_changed) ++ fprintf(stderr, "\n"); + + if (flag_dump_defs) + fputs(">\n", debugfile); +@@ -421,13 +592,26 @@ void export_symbol(const char *name) + } + + /*----------------------------------------------------------------------*/ ++ ++static void print_location(void) ++{ ++ fprintf(stderr, "%s:%d: ", cur_filename ? : "", cur_line); ++} ++ ++static void print_type_name(enum symbol_type type, const char *name) ++{ ++ if (type != SYM_NORMAL) ++ fprintf(stderr, "%s %s", symbol_type_name[type], name); ++ else ++ fprintf(stderr, "%s", name); ++} ++ + void error_with_pos(const char *fmt, ...) + { + va_list args; + + if (flag_warnings) { +- fprintf(stderr, "%s:%d: ", cur_filename ? : "", +- cur_line); ++ print_location(); + + va_start(args, fmt); + vfprintf(stderr, fmt, args); +@@ -445,7 +629,9 @@ static void genksyms_usage(void) + " -a, --arch Select architecture\n" + " -d, --debug Increment the debug level (repeatable)\n" + " -D, --dump Dump expanded symbol defs (for debugging only)\n" +- " -T, --dump-types file Dump expanded types into file (for debugging only)\n" ++ " -r, --reference file Read reference symbols from a file\n" ++ " -T, --dump-types file Dump expanded types into file\n" ++ " -p, --preserve Preserve reference modversions or fail\n" + " -w, --warnings Enable warnings\n" + " -q, --quiet Disable warnings (default)\n" + " -h, --help Print this message\n" +@@ -454,7 +640,9 @@ static void genksyms_usage(void) + " -a Select architecture\n" + " -d Increment the debug level (repeatable)\n" + " -D Dump expanded symbol defs (for debugging only)\n" +- " -T file Dump expanded types into file (for debugging only)\n" ++ " -r file Read reference symbols from a file\n" ++ " -T file Dump expanded types into file\n" ++ " -p Preserve reference modversions or fail\n" + " -w Enable warnings\n" + " -q Disable warnings (default)\n" + " -h Print this message\n" +@@ -465,7 +653,7 @@ static void genksyms_usage(void) + + int main(int argc, char **argv) + { +- FILE *dumpfile = NULL; ++ FILE *dumpfile = NULL, *ref_file = NULL; + int o; + + #ifdef __GNU_LIBRARY__ +@@ -475,16 +663,18 @@ int main(int argc, char **argv) + {"warnings", 0, 0, 'w'}, + {"quiet", 0, 0, 'q'}, + {"dump", 0, 0, 'D'}, ++ {"reference", 1, 0, 'r'}, + {"dump-types", 1, 0, 'T'}, ++ {"preserve", 0, 0, 'p'}, + {"version", 0, 0, 'V'}, + {"help", 0, 0, 'h'}, + {0, 0, 0, 0} + }; + +- while ((o = getopt_long(argc, argv, "a:dwqVDT:h", ++ while ((o = getopt_long(argc, argv, "a:dwqVDr:T:ph", + &long_opts[0], NULL)) != EOF) + #else /* __GNU_LIBRARY__ */ +- while ((o = getopt(argc, argv, "a:dwqVDT:h")) != EOF) ++ while ((o = getopt(argc, argv, "a:dwqVDr:T:ph")) != EOF) + #endif /* __GNU_LIBRARY__ */ + switch (o) { + case 'a': +@@ -505,6 +695,14 @@ int main(int argc, char **argv) + case 'D': + flag_dump_defs = 1; + break; ++ case 'r': ++ flag_reference = 1; ++ ref_file = fopen(optarg, "r"); ++ if (!ref_file) { ++ perror(optarg); ++ return 1; ++ } ++ break; + case 'T': + flag_dump_types = 1; + dumpfile = fopen(optarg, "w"); +@@ -513,6 +711,9 @@ int main(int argc, char **argv) + return 1; + } + break; ++ case 'p': ++ flag_preserve = 1; ++ break; + case 'h': + genksyms_usage(); + return 0; +@@ -533,6 +734,9 @@ int main(int argc, char **argv) + /* setlinebuf(debugfile); */ + } + ++ if (flag_reference) ++ read_reference(ref_file); ++ + yyparse(); + + if (flag_dump_types && visited_symbols) { +--- a/scripts/genksyms/genksyms.h ++++ b/scripts/genksyms/genksyms.h +@@ -29,6 +29,10 @@ enum symbol_type { + SYM_NORMAL, SYM_TYPEDEF, SYM_ENUM, SYM_STRUCT, SYM_UNION + }; + ++enum symbol_status { ++ STATUS_UNCHANGED, STATUS_DEFINED, STATUS_MODIFIED ++}; ++ + struct string_list { + struct string_list *next; + enum symbol_type tag; +@@ -43,6 +47,8 @@ struct symbol { + struct symbol *expansion_trail; + struct symbol *visited; + int is_extern; ++ int is_declared; ++ enum symbol_status status; + }; + + typedef struct string_list **yystype; diff --git a/src/patches/suse-2.6.27.25/patches.suse/kdb-resolve-uv-conflict.diff b/src/patches/suse-2.6.27.25/patches.suse/kdb-resolve-uv-conflict.diff new file mode 100644 index 0000000000..937cf24822 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/kdb-resolve-uv-conflict.diff @@ -0,0 +1,224 @@ +From: Jay Lan +Subject: [PATCH] Resolve KDB conflicts with UV +References: bnc#440376 + +Hi Keith, + +On Wed, Oct 29, 2008 at 03:57:25PM +1100, Keith Owens wrote: +> However there is a separate problem with your patch. You now wait in +> smp_kdb_stop() until all cpus are in KDB. If any cpu is completely +> hung so it cannot be interrupted then smp_kdb_stop() will never return +> and KDB will now appear to hang. +> +> The existing code avoids this by +> +> kdb() -> smp_kdb_stop() - issue KDB_VECTOR as normal interrupt but do not wait for cpus +> kdb() -> kdba_main_loop() +> kdba_main_loop() -> kdb_save_running() +> kdb_save_running() -> kdb_main_loop() +> kdb_main_loop() -> kdb_wait_for_cpus() +> +> kdb_wait_for_cpus() waits until the other cpus are in KDB. If a cpu +> does not respond to KDB_VECTOR after a few seconds then +> kdb_wait_for_cpus() hits the missing cpus with NMI. +> +> This two step approach (send KDB_VECTOR as normal interrupt, wait then +> send NMI) is used because NMI can be serviced at any time, even when +> the target cpu is in the middle of servicing an interrupt. This can +> result in incomplete register state which leads to broken backtraces. +> IOW, sending NMI first would actually make debugging harder. +> +> Given the above logic, if you are going to take over an existing +> interrupt vector then the vector needs to be acquired near the start of +> kdb() and released near the end of kdb(), and only on the master cpu. +> +> Note: there is no overwhelming need for KDB_VECTOR to have a high +> priority. As long as it is received within a few seconds then all is +> well. + +Thanks for the explanation. I see your point. + +How about if we keep the two step approach, but take over the vector +when we need it, in step one. Then give it back when the step two + wait is over. +(assuming we don't take over a vector needed for the NMI) + +Like this: + +Signed-off-by: Jay Lan +Acked-by: Bernhard Walle + +--- + arch/ia64/include/asm/kdb.h | 4 ++++ + arch/x86/kdb/kdbasupport_32.c | 22 ++++++++++++++++++---- + arch/x86/kdb/kdbasupport_64.c | 23 +++++++++++++++++++---- + include/asm-x86/irq_vectors.h | 11 ++++++----- + include/asm-x86/kdb.h | 4 ++++ + kdb/kdbmain.c | 2 ++ + 6 files changed, 53 insertions(+), 13 deletions(-) + +--- a/arch/ia64/include/asm/kdb.h ++++ b/arch/ia64/include/asm/kdb.h +@@ -42,4 +42,8 @@ kdba_funcptr_value(void *fp) + return *(unsigned long *)fp; + } + ++#ifdef CONFIG_SMP ++#define kdba_giveback_vector(vector) (0) ++#endif ++ + #endif /* !_ASM_KDB_H */ +--- a/arch/x86/kdb/kdbasupport_32.c ++++ b/arch/x86/kdb/kdbasupport_32.c +@@ -883,9 +883,6 @@ kdba_cpu_up(void) + static int __init + kdba_arch_init(void) + { +-#ifdef CONFIG_SMP +- set_intr_gate(KDB_VECTOR, kdb_interrupt); +-#endif + set_intr_gate(KDBENTER_VECTOR, kdb_call); + return 0; + } +@@ -1027,14 +1024,31 @@ kdba_verify_rw(unsigned long addr, size_ + + #include + ++gate_desc save_idt[NR_VECTORS]; ++ ++void kdba_takeover_vector(int vector) ++{ ++ memcpy(&save_idt[vector], &idt_table[vector], sizeof(gate_desc)); ++ set_intr_gate(KDB_VECTOR, kdb_interrupt); ++ return; ++} ++ ++void kdba_giveback_vector(int vector) ++{ ++ native_write_idt_entry(idt_table, vector, &save_idt[vector]); ++ return; ++} ++ + /* When first entering KDB, try a normal IPI. That reduces backtrace problems + * on the other cpus. + */ + void + smp_kdb_stop(void) + { +- if (!KDB_FLAG(NOIPI)) ++ if (!KDB_FLAG(NOIPI)) { ++ kdba_takeover_vector(KDB_VECTOR); + send_IPI_allbutself(KDB_VECTOR); ++ } + } + + /* The normal KDB IPI handler */ +--- a/arch/x86/kdb/kdbasupport_64.c ++++ b/arch/x86/kdb/kdbasupport_64.c +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -900,9 +901,6 @@ kdba_cpu_up(void) + static int __init + kdba_arch_init(void) + { +-#ifdef CONFIG_SMP +- set_intr_gate(KDB_VECTOR, kdb_interrupt); +-#endif + set_intr_gate(KDBENTER_VECTOR, kdb_call); + return 0; + } +@@ -976,14 +974,31 @@ kdba_set_current_task(const struct task_ + + #include + ++gate_desc save_idt[NR_VECTORS]; ++ ++void kdba_takeover_vector(int vector) ++{ ++ memcpy(&save_idt[vector], &idt_table[vector], sizeof(gate_desc)); ++ set_intr_gate(KDB_VECTOR, kdb_interrupt); ++ return; ++} ++ ++void kdba_giveback_vector(int vector) ++{ ++ native_write_idt_entry(idt_table, vector, &save_idt[vector]); ++ return; ++} ++ + /* When first entering KDB, try a normal IPI. That reduces backtrace problems + * on the other cpus. + */ + void + smp_kdb_stop(void) + { +- if (!KDB_FLAG(NOIPI)) ++ if (!KDB_FLAG(NOIPI)) { ++ kdba_takeover_vector(KDB_VECTOR); + send_IPI_allbutself(KDB_VECTOR); ++ } + } + + /* The normal KDB IPI handler */ +--- a/include/asm-x86/irq_vectors.h ++++ b/include/asm-x86/irq_vectors.h +@@ -66,7 +66,6 @@ + # define RESCHEDULE_VECTOR 0xfc + # define CALL_FUNCTION_VECTOR 0xfb + # define CALL_FUNCTION_SINGLE_VECTOR 0xfa +-#define KDB_VECTOR 0xf9 + # define THERMAL_APIC_VECTOR 0xf0 + + #else +@@ -79,10 +78,6 @@ + #define THERMAL_APIC_VECTOR 0xfa + #define THRESHOLD_APIC_VECTOR 0xf9 + #define UV_BAU_MESSAGE 0xf8 +-/* Overload KDB_VECTOR with UV_BAU_MESSAGE. By the time the UV hardware is +- * ready, we should have moved to a dynamically allocated vector scheme. +- */ +-#define KDB_VECTOR 0xf8 + #define INVALIDATE_TLB_VECTOR_END 0xf7 + #define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ + +@@ -91,6 +86,12 @@ + #endif + + /* ++ * KDB_VECTOR will take over vector 0xfe when it is needed, as in theory ++ * it should not be used anyway. ++ */ ++#define KDB_VECTOR 0xfe ++ ++/* + * Local APIC timer IRQ vector is on a different priority level, + * to work around the 'lost local interrupt if more than 2 IRQ + * sources per level' errata. +--- a/include/asm-x86/kdb.h ++++ b/include/asm-x86/kdb.h +@@ -131,4 +131,8 @@ kdba_funcptr_value(void *fp) + return (unsigned long)fp; + } + ++#ifdef CONFIG_SMP ++extern void kdba_giveback_vector(int); ++#endif ++ + #endif /* !_ASM_KDB_H */ +--- a/kdb/kdbmain.c ++++ b/kdb/kdbmain.c +@@ -1666,6 +1666,8 @@ kdb_wait_for_cpus(void) + wait == 1 ? " is" : "s are", + wait == 1 ? "its" : "their"); + } ++ /* give back the vector we took over in smp_kdb_stop */ ++ kdba_giveback_vector(KDB_VECTOR); + #endif /* CONFIG_SMP */ + } + diff --git a/src/patches/suse-2.6.27.25/patches.suse/nfs4acl-ai.diff b/src/patches/suse-2.6.27.25/patches.suse/nfs4acl-ai.diff new file mode 100644 index 0000000000..6d4d82c76b --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/nfs4acl-ai.diff @@ -0,0 +1,123 @@ +From: Andreas Gruenbacher +Subject: Implement those parts of Automatic Inheritance (AI) which are safe under POSIX + +If AI is disabled for a directory (ACL4_AUTO_INHERIT +not set), nothing changes. If AI is enabled for a directory, the +create-time inheritance algorithm changes as follows: + +* All inherited ACEs will have the ACE4_INHERITED_ACE flag set. + +* The create mode is applied to the ACL (by setting the file masks), +which means that the ACL must no longer be subject to AI permission +propagation, and so the ACL4_PROTECTED is set. + +By itelf, this is relatively useless because it will not allow +permissions to propagate, but AI aware applications can clear the +ACL4_PROTECTED flag when they know what they are doing, and this will +enable AI permission propagation. + +It would be nice if AI aware applications could indicate this fact to +the kernel so that the kernel can avoid setting the ACL4_PROTECTED flag +in the first place, but there is no such user-space interface at this +point. + +Signed-off-by: Andreas Gruenbacher + +--- + fs/nfs4acl_base.c | 12 ++++++++++-- + include/linux/nfs4acl.h | 26 +++++++++++++++++++++++--- + 2 files changed, 33 insertions(+), 5 deletions(-) + +--- a/fs/nfs4acl_base.c ++++ b/fs/nfs4acl_base.c +@@ -151,7 +151,8 @@ nfs4acl_chmod(struct nfs4acl *acl, mode_ + + if (acl->a_owner_mask == owner_mask && + acl->a_group_mask == group_mask && +- acl->a_other_mask == other_mask) ++ acl->a_other_mask == other_mask && ++ (!nfs4acl_is_auto_inherit(acl) || nfs4acl_is_protected(acl))) + return acl; + + clone = nfs4acl_clone(acl); +@@ -162,6 +163,8 @@ nfs4acl_chmod(struct nfs4acl *acl, mode_ + clone->a_owner_mask = owner_mask; + clone->a_group_mask = group_mask; + clone->a_other_mask = other_mask; ++ if (nfs4acl_is_auto_inherit(clone)) ++ clone->a_flags |= ACL4_PROTECTED; + + if (nfs4acl_write_through(&clone)) { + nfs4acl_put(clone); +@@ -558,7 +561,12 @@ nfs4acl_inherit(const struct nfs4acl *di + return ERR_PTR(-ENOMEM); + } + +- acl->a_flags = (dir_acl->a_flags & ACL4_WRITE_THROUGH); ++ acl->a_flags = (dir_acl->a_flags & ~ACL4_PROTECTED); ++ if (nfs4acl_is_auto_inherit(acl)) { ++ nfs4acl_for_each_entry(ace, acl) ++ ace->e_flags |= ACE4_INHERITED_ACE; ++ acl->a_flags |= ACL4_PROTECTED; ++ } + + return acl; + } +--- a/include/linux/nfs4acl.h ++++ b/include/linux/nfs4acl.h +@@ -32,10 +32,16 @@ struct nfs4acl { + _ace--) + + /* a_flags values */ ++#define ACL4_AUTO_INHERIT 0x01 ++#define ACL4_PROTECTED 0x02 ++#define ACL4_DEFAULTED 0x04 + #define ACL4_WRITE_THROUGH 0x40 + +-#define ACL4_VALID_FLAGS \ +- ACL4_WRITE_THROUGH ++#define ACL4_VALID_FLAGS ( \ ++ ACL4_AUTO_INHERIT | \ ++ ACL4_PROTECTED | \ ++ ACL4_DEFAULTED | \ ++ ACL4_WRITE_THROUGH ) + + /* e_type values */ + #define ACE4_ACCESS_ALLOWED_ACE_TYPE 0x0000 +@@ -51,6 +57,7 @@ struct nfs4acl { + /*#define ACE4_SUCCESSFUL_ACCESS_ACE_FLAG 0x0010*/ + /*#define ACE4_FAILED_ACCESS_ACE_FLAG 0x0020*/ + #define ACE4_IDENTIFIER_GROUP 0x0040 ++#define ACE4_INHERITED_ACE 0x0080 + #define ACE4_SPECIAL_WHO 0x4000 /* in-memory representation only */ + + #define ACE4_VALID_FLAGS ( \ +@@ -58,7 +65,8 @@ struct nfs4acl { + ACE4_DIRECTORY_INHERIT_ACE | \ + ACE4_NO_PROPAGATE_INHERIT_ACE | \ + ACE4_INHERIT_ONLY_ACE | \ +- ACE4_IDENTIFIER_GROUP ) ++ ACE4_IDENTIFIER_GROUP | \ ++ ACE4_INHERITED_ACE ) + + /* e_mask bitflags */ + #define ACE4_READ_DATA 0x00000001 +@@ -128,6 +136,18 @@ extern const char nfs4ace_group_who[]; + extern const char nfs4ace_everyone_who[]; + + static inline int ++nfs4acl_is_auto_inherit(const struct nfs4acl *acl) ++{ ++ return acl->a_flags & ACL4_AUTO_INHERIT; ++} ++ ++static inline int ++nfs4acl_is_protected(const struct nfs4acl *acl) ++{ ++ return acl->a_flags & ACL4_PROTECTED; ++} ++ ++static inline int + nfs4ace_is_owner(const struct nfs4ace *ace) + { + return (ace->e_flags & ACE4_SPECIAL_WHO) && diff --git a/src/patches/suse-2.6.27.25/patches.suse/nfs4acl-common.diff b/src/patches/suse-2.6.27.25/patches.suse/nfs4acl-common.diff new file mode 100644 index 0000000000..bccf826115 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/nfs4acl-common.diff @@ -0,0 +1,1770 @@ +From: Andreas Gruenbacher +Subject: NFSv4 ACL in-memory representation and manipulation + +* In-memory representation (struct nfs4acl). +* Functionality a filesystem needs such as permission checking, + apply mode to acl, compute mode from acl, inheritance upon file + create. +* Compute a mask-less acl from struct nfs4acl that grants the same + permissions. Protocols which don't understand the masks need + this. +* Convert to/from xattrs. + +Signed-off-by: Andreas Gruenbacher + +--- + fs/Kconfig | 4 + fs/Makefile | 4 + fs/nfs4acl_base.c | 565 +++++++++++++++++++++++++++++++ + fs/nfs4acl_compat.c | 757 ++++++++++++++++++++++++++++++++++++++++++ + fs/nfs4acl_xattr.c | 146 ++++++++ + include/linux/nfs4acl.h | 205 +++++++++++ + include/linux/nfs4acl_xattr.h | 32 + + 7 files changed, 1713 insertions(+) + +--- a/fs/Kconfig ++++ b/fs/Kconfig +@@ -419,6 +419,10 @@ config FS_POSIX_ACL + bool + default n + ++config FS_NFS4ACL ++ bool ++ default n ++ + source "fs/xfs/Kconfig" + source "fs/gfs2/Kconfig" + +--- a/fs/Makefile ++++ b/fs/Makefile +@@ -50,6 +50,10 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl. + obj-$(CONFIG_NFS_COMMON) += nfs_common/ + obj-$(CONFIG_GENERIC_ACL) += generic_acl.o + ++obj-$(CONFIG_FS_NFS4ACL) += nfs4acl.o ++nfs4acl-y := nfs4acl_base.o nfs4acl_xattr.o \ ++ nfs4acl_compat.o ++ + obj-$(CONFIG_QUOTA) += dquot.o + obj-$(CONFIG_QFMT_V1) += quota_v1.o + obj-$(CONFIG_QFMT_V2) += quota_v2.o +--- /dev/null ++++ b/fs/nfs4acl_base.c +@@ -0,0 +1,565 @@ ++/* ++ * Copyright (C) 2006 Andreas Gruenbacher ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2, or (at your option) any ++ * later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++MODULE_LICENSE("GPL"); ++ ++/* ++ * ACL entries that have ACE4_SPECIAL_WHO set in ace->e_flags use the ++ * pointer values of these constants in ace->u.e_who to avoid massive ++ * amounts of string comparisons. ++ */ ++ ++const char nfs4ace_owner_who[] = "OWNER@"; ++const char nfs4ace_group_who[] = "GROUP@"; ++const char nfs4ace_everyone_who[] = "EVERYONE@"; ++ ++EXPORT_SYMBOL(nfs4ace_owner_who); ++EXPORT_SYMBOL(nfs4ace_group_who); ++EXPORT_SYMBOL(nfs4ace_everyone_who); ++ ++/** ++ * nfs4acl_alloc - allocate an acl ++ * @count: number of entries ++ */ ++struct nfs4acl * ++nfs4acl_alloc(int count) ++{ ++ size_t size = sizeof(struct nfs4acl) + count * sizeof(struct nfs4ace); ++ struct nfs4acl *acl = kmalloc(size, GFP_KERNEL); ++ ++ if (acl) { ++ memset(acl, 0, size); ++ atomic_set(&acl->a_refcount, 1); ++ acl->a_count = count; ++ } ++ return acl; ++} ++EXPORT_SYMBOL(nfs4acl_alloc); ++ ++/** ++ * nfs4acl_clone - create a copy of an acl ++ */ ++struct nfs4acl * ++nfs4acl_clone(const struct nfs4acl *acl) ++{ ++ int count = acl->a_count; ++ size_t size = sizeof(struct nfs4acl) + count * sizeof(struct nfs4ace); ++ struct nfs4acl *dup = kmalloc(size, GFP_KERNEL); ++ ++ if (dup) { ++ memcpy(dup, acl, size); ++ atomic_set(&dup->a_refcount, 1); ++ } ++ return dup; ++} ++ ++/* ++ * The POSIX permissions are supersets of the below mask flags. ++ * ++ * The ACE4_READ_ATTRIBUTES and ACE4_READ_ACL flags are always granted ++ * in POSIX. The ACE4_SYNCHRONIZE flag has no meaning under POSIX. We ++ * make sure that we do not mask them if they are set, so that users who ++ * rely on these flags won't get confused. ++ */ ++#define ACE4_POSIX_MODE_READ ( \ ++ ACE4_READ_DATA | ACE4_LIST_DIRECTORY ) ++#define ACE4_POSIX_MODE_WRITE ( \ ++ ACE4_WRITE_DATA | ACE4_ADD_FILE | \ ++ ACE4_APPEND_DATA | ACE4_ADD_SUBDIRECTORY | \ ++ ACE4_DELETE_CHILD ) ++#define ACE4_POSIX_MODE_EXEC ( \ ++ ACE4_EXECUTE) ++ ++static int ++nfs4acl_mask_to_mode(unsigned int mask) ++{ ++ int mode = 0; ++ ++ if (mask & ACE4_POSIX_MODE_READ) ++ mode |= MAY_READ; ++ if (mask & ACE4_POSIX_MODE_WRITE) ++ mode |= MAY_WRITE; ++ if (mask & ACE4_POSIX_MODE_EXEC) ++ mode |= MAY_EXEC; ++ ++ return mode; ++} ++ ++/** ++ * nfs4acl_masks_to_mode - compute file mode permission bits from file masks ++ * ++ * Compute the file mode permission bits from the file masks in the acl. ++ */ ++int ++nfs4acl_masks_to_mode(const struct nfs4acl *acl) ++{ ++ return nfs4acl_mask_to_mode(acl->a_owner_mask) << 6 | ++ nfs4acl_mask_to_mode(acl->a_group_mask) << 3 | ++ nfs4acl_mask_to_mode(acl->a_other_mask); ++} ++EXPORT_SYMBOL(nfs4acl_masks_to_mode); ++ ++static unsigned int ++nfs4acl_mode_to_mask(mode_t mode) ++{ ++ unsigned int mask = ACE4_POSIX_ALWAYS_ALLOWED; ++ ++ if (mode & MAY_READ) ++ mask |= ACE4_POSIX_MODE_READ; ++ if (mode & MAY_WRITE) ++ mask |= ACE4_POSIX_MODE_WRITE; ++ if (mode & MAY_EXEC) ++ mask |= ACE4_POSIX_MODE_EXEC; ++ ++ return mask; ++} ++ ++/** ++ * nfs4acl_chmod - update the file masks to reflect the new mode ++ * @mode: file mode permission bits to apply to the @acl ++ * ++ * Converts the mask flags corresponding to the owner, group, and other file ++ * permissions and computes the file masks. Returns @acl if it already has the ++ * appropriate file masks, or updates the flags in a copy of @acl. Takes over ++ * @acl. ++ */ ++struct nfs4acl * ++nfs4acl_chmod(struct nfs4acl *acl, mode_t mode) ++{ ++ unsigned int owner_mask, group_mask, other_mask; ++ struct nfs4acl *clone; ++ ++ owner_mask = nfs4acl_mode_to_mask(mode >> 6); ++ group_mask = nfs4acl_mode_to_mask(mode >> 3); ++ other_mask = nfs4acl_mode_to_mask(mode); ++ ++ if (acl->a_owner_mask == owner_mask && ++ acl->a_group_mask == group_mask && ++ acl->a_other_mask == other_mask) ++ return acl; ++ ++ clone = nfs4acl_clone(acl); ++ nfs4acl_put(acl); ++ if (!clone) ++ return ERR_PTR(-ENOMEM); ++ ++ clone->a_owner_mask = owner_mask; ++ clone->a_group_mask = group_mask; ++ clone->a_other_mask = other_mask; ++ ++ if (nfs4acl_write_through(&clone)) { ++ nfs4acl_put(clone); ++ clone = ERR_PTR(-ENOMEM); ++ } ++ return clone; ++} ++EXPORT_SYMBOL(nfs4acl_chmod); ++ ++/** ++ * nfs4acl_want_to_mask - convert permission want argument to a mask ++ * @want: @want argument of the permission inode operation ++ * ++ * When checking for append, @want is (MAY_WRITE | MAY_APPEND). ++ */ ++unsigned int ++nfs4acl_want_to_mask(int want) ++{ ++ unsigned int mask = 0; ++ ++ if (want & MAY_READ) ++ mask |= ACE4_READ_DATA; ++ if (want & MAY_APPEND) ++ mask |= ACE4_APPEND_DATA; ++ else if (want & MAY_WRITE) ++ mask |= ACE4_WRITE_DATA; ++ if (want & MAY_EXEC) ++ mask |= ACE4_EXECUTE; ++ ++ return mask; ++} ++EXPORT_SYMBOL(nfs4acl_want_to_mask); ++ ++/** ++ * nfs4acl_capability_check - check for capabilities overriding read/write access ++ * @inode: inode to check ++ * @mask: requested access (ACE4_* bitmask) ++ * ++ * Capabilities other than CAP_DAC_OVERRIDE and CAP_DAC_READ_SEARCH must be checked ++ * separately. ++ */ ++static inline int nfs4acl_capability_check(struct inode *inode, unsigned int mask) ++{ ++ /* ++ * Read/write DACs are always overridable. ++ * Executable DACs are overridable if at least one exec bit is set. ++ */ ++ if (!(mask & (ACE4_WRITE_ACL | ACE4_WRITE_OWNER)) && ++ (!(mask & ACE4_EXECUTE) || ++ (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))) ++ if (capable(CAP_DAC_OVERRIDE)) ++ return 0; ++ ++ /* ++ * Searching includes executable on directories, else just read. ++ */ ++ if (!(mask & ~(ACE4_READ_DATA | ACE4_EXECUTE)) && ++ (S_ISDIR(inode->i_mode) || !(mask & ACE4_EXECUTE))) ++ if (capable(CAP_DAC_READ_SEARCH)) ++ return 0; ++ ++ return -EACCES; ++} ++ ++/** ++ * nfs4acl_permission - permission check algorithm with masking ++ * @inode: inode to check ++ * @acl: nfs4 acl of the inode ++ * @mask: requested access (ACE4_* bitmask) ++ * ++ * Checks if the current process is granted @mask flags in @acl. With ++ * write-through, the OWNER@ is always granted the owner file mask, the ++ * GROUP@ is always granted the group file mask, and EVERYONE@ is always ++ * granted the other file mask. Otherwise, processes are only granted ++ * @mask flags which they are granted in the @acl as well as in their ++ * file mask. ++ */ ++int nfs4acl_permission(struct inode *inode, const struct nfs4acl *acl, ++ unsigned int mask) ++{ ++ const struct nfs4ace *ace; ++ unsigned int file_mask, requested = mask, denied = 0; ++ int in_owning_group = in_group_p(inode->i_gid); ++ int owner_or_group_class = in_owning_group; ++ ++ /* ++ * A process is in the ++ * - owner file class if it owns the file, in the ++ * - group file class if it is in the file's owning group or ++ * it matches any of the user or group entries, and in the ++ * - other file class otherwise. ++ */ ++ ++ nfs4acl_for_each_entry(ace, acl) { ++ unsigned int ace_mask = ace->e_mask; ++ ++ if (nfs4ace_is_inherit_only(ace)) ++ continue; ++ if (nfs4ace_is_owner(ace)) { ++ if (current->fsuid != inode->i_uid) ++ continue; ++ goto is_owner; ++ } else if (nfs4ace_is_group(ace)) { ++ if (!in_owning_group) ++ continue; ++ } else if (nfs4ace_is_unix_id(ace)) { ++ if (ace->e_flags & ACE4_IDENTIFIER_GROUP) { ++ if (!in_group_p(ace->u.e_id)) ++ continue; ++ } else { ++ if (current->fsuid != ace->u.e_id) ++ continue; ++ } ++ } else ++ goto is_everyone; ++ ++ /* ++ * Apply the group file mask to entries other than OWNER@ and ++ * EVERYONE@. This is not required for correct access checking ++ * but ensures that we grant the same permissions as the acl ++ * computed by nfs4acl_apply_masks(). ++ * ++ * For example, without this restriction, 'group@:rw::allow' ++ * with mode 0600 would grant rw access to owner processes ++ * which are also in the owning group. This cannot be expressed ++ * in an acl. ++ */ ++ if (nfs4ace_is_allow(ace)) ++ ace_mask &= acl->a_group_mask; ++ ++ is_owner: ++ /* The process is in the owner or group file class. */ ++ owner_or_group_class = 1; ++ ++ is_everyone: ++ /* Check which mask flags the ACE allows or denies. */ ++ if (nfs4ace_is_deny(ace)) ++ denied |= ace_mask & mask; ++ mask &= ~ace_mask; ++ ++ /* Keep going until we know which file class the process is in. */ ++ if (!mask && owner_or_group_class) ++ break; ++ } ++ denied |= mask; ++ ++ /* ++ * Figure out which file mask applies. ++ * Clear write-through if the process is in the file group class but ++ * not in the owning group, and so the denied permissions apply. ++ */ ++ if (current->fsuid == inode->i_uid) ++ file_mask = acl->a_owner_mask; ++ else if (in_owning_group || owner_or_group_class) ++ file_mask = acl->a_group_mask; ++ else ++ file_mask = acl->a_other_mask; ++ ++ denied |= requested & ~file_mask; ++ if (!denied) ++ return 0; ++ return nfs4acl_capability_check(inode, requested); ++} ++EXPORT_SYMBOL(nfs4acl_permission); ++ ++/** ++ * nfs4acl_generic_permission - permission check algorithm without explicit acl ++ * @inode: inode to check permissions for ++ * @mask: requested access (ACE4_* bitmask) ++ * ++ * The file mode of a file without ACL corresponds to an ACL with a single ++ * "EVERYONE:~0::ALLOW" entry, with file masks that correspond to the file mode ++ * permissions. Instead of constructing a temporary ACL and applying ++ * nfs4acl_permission() to it, compute the identical result directly from the file ++ * mode. ++ */ ++int nfs4acl_generic_permission(struct inode *inode, unsigned int mask) ++{ ++ int mode = inode->i_mode; ++ ++ if (current->fsuid == inode->i_uid) ++ mode >>= 6; ++ else if (in_group_p(inode->i_gid)) ++ mode >>= 3; ++ if (!(mask & ~nfs4acl_mode_to_mask(mode))) ++ return 0; ++ return nfs4acl_capability_check(inode, mask); ++} ++EXPORT_SYMBOL(nfs4acl_generic_permission); ++ ++/* ++ * nfs4ace_is_same_who - do both acl entries refer to the same identifier? ++ */ ++int ++nfs4ace_is_same_who(const struct nfs4ace *a, const struct nfs4ace *b) ++{ ++#define WHO_FLAGS (ACE4_SPECIAL_WHO | ACE4_IDENTIFIER_GROUP) ++ if ((a->e_flags & WHO_FLAGS) != (b->e_flags & WHO_FLAGS)) ++ return 0; ++ if (a->e_flags & ACE4_SPECIAL_WHO) ++ return a->u.e_who == b->u.e_who; ++ else ++ return a->u.e_id == b->u.e_id; ++#undef WHO_FLAGS ++} ++ ++/** ++ * nfs4acl_set_who - set a special who value ++ * @ace: acl entry ++ * @who: who value to use ++ */ ++int ++nfs4ace_set_who(struct nfs4ace *ace, const char *who) ++{ ++ if (!strcmp(who, nfs4ace_owner_who)) ++ who = nfs4ace_owner_who; ++ else if (!strcmp(who, nfs4ace_group_who)) ++ who = nfs4ace_group_who; ++ else if (!strcmp(who, nfs4ace_everyone_who)) ++ who = nfs4ace_everyone_who; ++ else ++ return -EINVAL; ++ ++ ace->u.e_who = who; ++ ace->e_flags |= ACE4_SPECIAL_WHO; ++ ace->e_flags &= ~ACE4_IDENTIFIER_GROUP; ++ return 0; ++} ++EXPORT_SYMBOL(nfs4ace_set_who); ++ ++/** ++ * nfs4acl_allowed_to_who - mask flags allowed to a specific who value ++ * ++ * Computes the mask values allowed to a specific who value, taking ++ * EVERYONE@ entries into account. ++ */ ++static unsigned int ++nfs4acl_allowed_to_who(struct nfs4acl *acl, struct nfs4ace *who) ++{ ++ struct nfs4ace *ace; ++ unsigned int allowed = 0; ++ ++ nfs4acl_for_each_entry_reverse(ace, acl) { ++ if (nfs4ace_is_inherit_only(ace)) ++ continue; ++ if (nfs4ace_is_same_who(ace, who) || ++ nfs4ace_is_everyone(ace)) { ++ if (nfs4ace_is_allow(ace)) ++ allowed |= ace->e_mask; ++ else if (nfs4ace_is_deny(ace)) ++ allowed &= ~ace->e_mask; ++ } ++ } ++ return allowed; ++} ++ ++/** ++ * nfs4acl_compute_max_masks - compute upper bound masks ++ * ++ * Computes upper bound owner, group, and other masks so that none of ++ * the mask flags allowed by the acl are disabled (for any choice of the ++ * file owner or group membership). ++ */ ++static void ++nfs4acl_compute_max_masks(struct nfs4acl *acl) ++{ ++ struct nfs4ace *ace; ++ ++ acl->a_owner_mask = 0; ++ acl->a_group_mask = 0; ++ acl->a_other_mask = 0; ++ ++ nfs4acl_for_each_entry_reverse(ace, acl) { ++ if (nfs4ace_is_inherit_only(ace)) ++ continue; ++ ++ if (nfs4ace_is_owner(ace)) { ++ if (nfs4ace_is_allow(ace)) ++ acl->a_owner_mask |= ace->e_mask; ++ else if (nfs4ace_is_deny(ace)) ++ acl->a_owner_mask &= ~ace->e_mask; ++ } else if (nfs4ace_is_everyone(ace)) { ++ if (nfs4ace_is_allow(ace)) { ++ struct nfs4ace who = { ++ .e_flags = ACE4_SPECIAL_WHO, ++ .u.e_who = nfs4ace_group_who, ++ }; ++ ++ acl->a_other_mask |= ace->e_mask; ++ acl->a_group_mask |= ++ nfs4acl_allowed_to_who(acl, &who); ++ acl->a_owner_mask |= ace->e_mask; ++ } else if (nfs4ace_is_deny(ace)) { ++ acl->a_other_mask &= ~ace->e_mask; ++ acl->a_group_mask &= ~ace->e_mask; ++ acl->a_owner_mask &= ~ace->e_mask; ++ } ++ } else { ++ if (nfs4ace_is_allow(ace)) { ++ unsigned int mask = ++ nfs4acl_allowed_to_who(acl, ace); ++ ++ acl->a_group_mask |= mask; ++ acl->a_owner_mask |= mask; ++ } ++ } ++ } ++} ++ ++/** ++ * nfs4acl_inherit - compute the acl a new file will inherit ++ * @dir_acl: acl of the containing direcory ++ * @mode: file type and create mode of the new file ++ * ++ * Given the containing directory's acl, this function will compute the ++ * acl that new files in that directory will inherit, or %NULL if ++ * @dir_acl does not contain acl entries inheritable by this file. ++ * ++ * Without write-through, the file masks in the returned acl are set to ++ * the intersection of the create mode and the maximum permissions ++ * allowed to each file class. With write-through, the file masks are ++ * set to the create mode. ++ */ ++struct nfs4acl * ++nfs4acl_inherit(const struct nfs4acl *dir_acl, mode_t mode) ++{ ++ const struct nfs4ace *dir_ace; ++ struct nfs4acl *acl; ++ struct nfs4ace *ace; ++ int count = 0; ++ ++ if (S_ISDIR(mode)) { ++ nfs4acl_for_each_entry(dir_ace, dir_acl) { ++ if (!nfs4ace_is_inheritable(dir_ace)) ++ continue; ++ count++; ++ } ++ if (!count) ++ return NULL; ++ acl = nfs4acl_alloc(count); ++ if (!acl) ++ return ERR_PTR(-ENOMEM); ++ ace = acl->a_entries; ++ nfs4acl_for_each_entry(dir_ace, dir_acl) { ++ if (!nfs4ace_is_inheritable(dir_ace)) ++ continue; ++ memcpy(ace, dir_ace, sizeof(struct nfs4ace)); ++ if (dir_ace->e_flags & ACE4_NO_PROPAGATE_INHERIT_ACE) ++ nfs4ace_clear_inheritance_flags(ace); ++ if ((dir_ace->e_flags & ACE4_FILE_INHERIT_ACE) && ++ !(dir_ace->e_flags & ACE4_DIRECTORY_INHERIT_ACE)) ++ ace->e_flags |= ACE4_INHERIT_ONLY_ACE; ++ ace++; ++ } ++ } else { ++ nfs4acl_for_each_entry(dir_ace, dir_acl) { ++ if (!(dir_ace->e_flags & ACE4_FILE_INHERIT_ACE)) ++ continue; ++ count++; ++ } ++ if (!count) ++ return NULL; ++ acl = nfs4acl_alloc(count); ++ if (!acl) ++ return ERR_PTR(-ENOMEM); ++ ace = acl->a_entries; ++ nfs4acl_for_each_entry(dir_ace, dir_acl) { ++ if (!(dir_ace->e_flags & ACE4_FILE_INHERIT_ACE)) ++ continue; ++ memcpy(ace, dir_ace, sizeof(struct nfs4ace)); ++ nfs4ace_clear_inheritance_flags(ace); ++ ace++; ++ } ++ } ++ ++ /* The maximum max flags that the owner, group, and other classes ++ are allowed. */ ++ if (dir_acl->a_flags & ACL4_WRITE_THROUGH) { ++ acl->a_owner_mask = ACE4_VALID_MASK; ++ acl->a_group_mask = ACE4_VALID_MASK; ++ acl->a_other_mask = ACE4_VALID_MASK; ++ ++ mode &= ~current->fs->umask; ++ } else ++ nfs4acl_compute_max_masks(acl); ++ ++ /* Apply the create mode. */ ++ acl->a_owner_mask &= nfs4acl_mode_to_mask(mode >> 6); ++ acl->a_group_mask &= nfs4acl_mode_to_mask(mode >> 3); ++ acl->a_other_mask &= nfs4acl_mode_to_mask(mode); ++ ++ if (nfs4acl_write_through(&acl)) { ++ nfs4acl_put(acl); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ acl->a_flags = (dir_acl->a_flags & ACL4_WRITE_THROUGH); ++ ++ return acl; ++} ++EXPORT_SYMBOL(nfs4acl_inherit); +--- /dev/null ++++ b/fs/nfs4acl_compat.c +@@ -0,0 +1,757 @@ ++/* ++ * Copyright (C) 2006 Andreas Gruenbacher ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2, or (at your option) any ++ * later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ */ ++ ++#include ++#include ++#include ++ ++/** ++ * struct nfs4acl_alloc - remember how many entries are actually allocated ++ * @acl: acl with a_count <= @count ++ * @count: the actual number of entries allocated in @acl ++ * ++ * We pass around this structure while modifying an acl, so that we do ++ * not have to reallocate when we remove existing entries followed by ++ * adding new entries. ++ */ ++struct nfs4acl_alloc { ++ struct nfs4acl *acl; ++ unsigned int count; ++}; ++ ++/** ++ * nfs4acl_delete_entry - delete an entry in an acl ++ * @x: acl and number of allocated entries ++ * @ace: an entry in @x->acl ++ * ++ * Updates @ace so that it points to the entry before the deleted entry ++ * on return. (When deleting the first entry, @ace will point to the ++ * (non-existant) entry before the first entry). This behavior is the ++ * expected behavior when deleting entries while forward iterating over ++ * an acl. ++ */ ++static void ++nfs4acl_delete_entry(struct nfs4acl_alloc *x, struct nfs4ace **ace) ++{ ++ void *end = x->acl->a_entries + x->acl->a_count; ++ ++ memmove(*ace, *ace + 1, end - (void *)(*ace + 1)); ++ (*ace)--; ++ x->acl->a_count--; ++} ++ ++/** ++ * nfs4acl_insert_entry - insert an entry in an acl ++ * @x: acl and number of allocated entries ++ * @ace: entry before which the new entry shall be inserted ++ * ++ * Insert a new entry in @x->acl at position @ace, and zero-initialize ++ * it. This may require reallocating @x->acl. ++ */ ++static int ++nfs4acl_insert_entry(struct nfs4acl_alloc *x, struct nfs4ace **ace) ++{ ++ if (x->count == x->acl->a_count) { ++ int n = *ace - x->acl->a_entries; ++ struct nfs4acl *acl2; ++ ++ acl2 = nfs4acl_alloc(x->acl->a_count + 1); ++ if (!acl2) ++ return -1; ++ acl2->a_flags = x->acl->a_flags; ++ acl2->a_owner_mask = x->acl->a_owner_mask; ++ acl2->a_group_mask = x->acl->a_group_mask; ++ acl2->a_other_mask = x->acl->a_other_mask; ++ memcpy(acl2->a_entries, x->acl->a_entries, ++ n * sizeof(struct nfs4ace)); ++ memcpy(acl2->a_entries + n + 1, *ace, ++ (x->acl->a_count - n) * sizeof(struct nfs4ace)); ++ kfree(x->acl); ++ x->acl = acl2; ++ x->count = acl2->a_count; ++ *ace = acl2->a_entries + n; ++ } else { ++ void *end = x->acl->a_entries + x->acl->a_count; ++ ++ memmove(*ace + 1, *ace, end - (void *)*ace); ++ x->acl->a_count++; ++ } ++ memset(*ace, 0, sizeof(struct nfs4ace)); ++ return 0; ++} ++ ++/** ++ * nfs4ace_change_mask - change the mask in @ace to @mask ++ * @x: acl and number of allocated entries ++ * @ace: entry to modify ++ * @mask: new mask for @ace ++ * ++ * Set the effective mask of @ace to @mask. This will require splitting ++ * off a separate acl entry if @ace is inheritable. In that case, the ++ * effective- only acl entry is inserted after the inheritable acl ++ * entry, end the inheritable acl entry is set to inheritable-only. If ++ * @mode is 0, either set the original acl entry to inheritable-only if ++ * it was inheritable, or remove it otherwise. The returned @ace points ++ * to the modified or inserted effective-only acl entry if that entry ++ * exists, to the entry that has become inheritable-only, or else to the ++ * previous entry in the acl. This is the expected behavior when ++ * modifying masks while forward iterating over an acl. ++ */ ++static int ++nfs4ace_change_mask(struct nfs4acl_alloc *x, struct nfs4ace **ace, ++ unsigned int mask) ++{ ++ if (mask && (*ace)->e_mask == mask) ++ return 0; ++ if (mask & ~ACE4_POSIX_ALWAYS_ALLOWED) { ++ if (nfs4ace_is_inheritable(*ace)) { ++ if (nfs4acl_insert_entry(x, ace)) ++ return -1; ++ memcpy(*ace, *ace + 1, sizeof(struct nfs4ace)); ++ (*ace)->e_flags |= ACE4_INHERIT_ONLY_ACE; ++ (*ace)++; ++ nfs4ace_clear_inheritance_flags(*ace); ++ } ++ (*ace)->e_mask = mask; ++ } else { ++ if (nfs4ace_is_inheritable(*ace)) ++ (*ace)->e_flags |= ACE4_INHERIT_ONLY_ACE; ++ else ++ nfs4acl_delete_entry(x, ace); ++ } ++ return 0; ++} ++ ++/** ++ * nfs4acl_move_everyone_aces_down - move everyone@ acl entries to the end ++ * @x: acl and number of allocated entries ++ * ++ * Move all everyone acl entries to the bottom of the acl so that only a ++ * single everyone@ allow acl entry remains at the end, and update the ++ * mask fields of all acl entries on the way. If everyone@ is not ++ * granted any permissions, no empty everyone@ acl entry is inserted. ++ * ++ * This transformation does not modify the permissions that the acl ++ * grants, but we need it to simplify successive transformations. ++ */ ++static int ++nfs4acl_move_everyone_aces_down(struct nfs4acl_alloc *x) ++{ ++ struct nfs4ace *ace; ++ unsigned int allowed = 0, denied = 0; ++ ++ nfs4acl_for_each_entry(ace, x->acl) { ++ if (nfs4ace_is_inherit_only(ace)) ++ continue; ++ if (nfs4ace_is_everyone(ace)) { ++ if (nfs4ace_is_allow(ace)) ++ allowed |= (ace->e_mask & ~denied); ++ else if (nfs4ace_is_deny(ace)) ++ denied |= (ace->e_mask & ~allowed); ++ else ++ continue; ++ if (nfs4ace_change_mask(x, &ace, 0)) ++ return -1; ++ } else { ++ if (nfs4ace_is_allow(ace)) { ++ if (nfs4ace_change_mask(x, &ace, allowed | ++ (ace->e_mask & ~denied))) ++ return -1; ++ } else if (nfs4ace_is_deny(ace)) { ++ if (nfs4ace_change_mask(x, &ace, denied | ++ (ace->e_mask & ~allowed))) ++ return -1; ++ } ++ } ++ } ++ if (allowed & ~ACE4_POSIX_ALWAYS_ALLOWED) { ++ struct nfs4ace *last_ace = ace - 1; ++ ++ if (nfs4ace_is_everyone(last_ace) && ++ nfs4ace_is_allow(last_ace) && ++ nfs4ace_is_inherit_only(last_ace) && ++ last_ace->e_mask == allowed) ++ last_ace->e_flags &= ~ACE4_INHERIT_ONLY_ACE; ++ else { ++ if (nfs4acl_insert_entry(x, &ace)) ++ return -1; ++ ace->e_type = ACE4_ACCESS_ALLOWED_ACE_TYPE; ++ ace->e_flags = ACE4_SPECIAL_WHO; ++ ace->e_mask = allowed; ++ ace->u.e_who = nfs4ace_everyone_who; ++ } ++ } ++ return 0; ++} ++ ++/** ++ * __nfs4acl_propagate_everyone - propagate everyone@ mask flags up for @who ++ * @x: acl and number of allocated entries ++ * @who: identifier to propagate mask flags for ++ * @allow: mask flags to propagate up ++ * ++ * Propagate mask flags from the trailing everyone@ allow acl entry up ++ * for the specified @who. ++ * ++ * The idea here is to precede the trailing EVERYONE@ ALLOW entry by an ++ * additional @who ALLOW entry, but with the following optimizations: ++ * (1) we don't bother setting any flags in the new @who ALLOW entry ++ * that has already been allowed or denied by a previous @who entry, (2) ++ * we merge the new @who entry with a previous @who entry if there is ++ * such a previous @who entry and there are no intervening DENY entries ++ * with mask flags that overlap the flags we care about. ++ */ ++static int ++__nfs4acl_propagate_everyone(struct nfs4acl_alloc *x, struct nfs4ace *who, ++ unsigned int allow) ++{ ++ struct nfs4ace *allow_last = NULL, *ace; ++ ++ /* Remove the mask flags from allow that are already determined for ++ this who value, and figure out if there is an ALLOW entry for ++ this who value that is "reachable" from the trailing EVERYONE@ ++ ALLOW ACE. */ ++ nfs4acl_for_each_entry(ace, x->acl) { ++ if (nfs4ace_is_inherit_only(ace)) ++ continue; ++ if (nfs4ace_is_allow(ace)) { ++ if (nfs4ace_is_same_who(ace, who)) { ++ allow &= ~ace->e_mask; ++ allow_last = ace; ++ } ++ } else if (nfs4ace_is_deny(ace)) { ++ if (nfs4ace_is_same_who(ace, who)) ++ allow &= ~ace->e_mask; ++ if (allow & ace->e_mask) ++ allow_last = NULL; ++ } ++ } ++ ++ if (allow) { ++ if (allow_last) ++ return nfs4ace_change_mask(x, &allow_last, ++ allow_last->e_mask | allow); ++ else { ++ struct nfs4ace who_copy; ++ ++ ace = x->acl->a_entries + x->acl->a_count - 1; ++ memcpy(&who_copy, who, sizeof(struct nfs4ace)); ++ if (nfs4acl_insert_entry(x, &ace)) ++ return -1; ++ memcpy(ace, &who_copy, sizeof(struct nfs4ace)); ++ ace->e_type = ACE4_ACCESS_ALLOWED_ACE_TYPE; ++ nfs4ace_clear_inheritance_flags(ace); ++ ace->e_mask = allow; ++ } ++ } ++ return 0; ++} ++ ++/** ++ * nfs4acl_propagate_everyone - propagate everyone@ mask flags up the acl ++ * @x: acl and number of allocated entries ++ * ++ * Make sure for owner@, group@, and all other users, groups, and ++ * special identifiers that they are allowed or denied all permissions ++ * that are granted be the trailing everyone@ acl entry. If they are ++ * not, try to add the missing permissions to existing allow acl entries ++ * for those users, or introduce additional acl entries if that is not ++ * possible. ++ * ++ * We do this so that no mask flags will get lost when finally applying ++ * the file masks to the acl entries: otherwise, with an other file mask ++ * that is more restrictive than the owner and/or group file mask, mask ++ * flags that were allowed to processes in the owner and group classes ++ * and that the other mask denies would be lost. For example, the ++ * following two acls show the problem when mode 0664 is applied to ++ * them: ++ * ++ * masking without propagation (wrong) ++ * =========================================================== ++ * joe:r::allow => joe:r::allow ++ * everyone@:rwx::allow => everyone@:r::allow ++ * ----------------------------------------------------------- ++ * joe:w::deny => joe:w::deny ++ * everyone@:rwx::allow everyone@:r::allow ++ * ++ * Note that the permissions of joe end up being more restrictive than ++ * what the acl would allow when first computing the allowed flags and ++ * then applying the respective mask. With propagation of permissions, ++ * we get: ++ * ++ * masking after propagation (correct) ++ * =========================================================== ++ * joe:r::allow => joe:rw::allow ++ * owner@:rw::allow ++ * group@:rw::allow ++ * everyone@:rwx::allow everyone@:r::allow ++ * ----------------------------------------------------------- ++ * joe:w::deny => owner@:x::deny ++ * joe:w::deny ++ * owner@:rw::allow ++ * owner@:rw::allow ++ * joe:r::allow ++ * everyone@:rwx::allow everyone@:r::allow ++ * ++ * The examples show the acls that would result from propagation with no ++ * masking performed. In fact, we do apply the respective mask to the ++ * acl entries before computing the propagation because this will save ++ * us from adding acl entries that would end up with empty mask fields ++ * after applying the masks. ++ * ++ * It is ensured that no more than one entry will be inserted for each ++ * who value, no matter how many entries each who value has already. ++ */ ++static int ++nfs4acl_propagate_everyone(struct nfs4acl_alloc *x) ++{ ++ int write_through = (x->acl->a_flags & ACL4_WRITE_THROUGH); ++ struct nfs4ace who = { .e_flags = ACE4_SPECIAL_WHO }; ++ struct nfs4ace *ace; ++ unsigned int owner_allow, group_allow; ++ int retval; ++ ++ if (!((x->acl->a_owner_mask | x->acl->a_group_mask) & ++ ~x->acl->a_other_mask)) ++ return 0; ++ if (!x->acl->a_count) ++ return 0; ++ ace = x->acl->a_entries + x->acl->a_count - 1; ++ if (nfs4ace_is_inherit_only(ace) || !nfs4ace_is_everyone(ace)) ++ return 0; ++ if (!(ace->e_mask & ~x->acl->a_other_mask)) { ++ /* None of the allowed permissions will get masked. */ ++ return 0; ++ } ++ owner_allow = ace->e_mask & x->acl->a_owner_mask; ++ group_allow = ace->e_mask & x->acl->a_group_mask; ++ ++ /* Propagate everyone@ permissions through to owner@. */ ++ if (owner_allow && !write_through && ++ (x->acl->a_owner_mask & ~x->acl->a_other_mask)) { ++ who.u.e_who = nfs4ace_owner_who; ++ retval = __nfs4acl_propagate_everyone(x, &who, owner_allow); ++ if (retval) ++ return -1; ++ } ++ ++ if (group_allow && (x->acl->a_group_mask & ~x->acl->a_other_mask)) { ++ int n; ++ ++ if (!write_through) { ++ /* Propagate everyone@ permissions through to group@. */ ++ who.u.e_who = nfs4ace_group_who; ++ retval = __nfs4acl_propagate_everyone(x, &who, ++ group_allow); ++ if (retval) ++ return -1; ++ } ++ ++ /* Start from the entry before the trailing EVERYONE@ ALLOW ++ entry. We will not hit EVERYONE@ entries in the loop. */ ++ for (n = x->acl->a_count - 2; n != -1; n--) { ++ ace = x->acl->a_entries + n; ++ ++ if (nfs4ace_is_inherit_only(ace) || ++ nfs4ace_is_owner(ace) || ++ nfs4ace_is_group(ace)) ++ continue; ++ if (nfs4ace_is_allow(ace) || nfs4ace_is_deny(ace)) { ++ /* Any inserted entry will end up below the ++ current entry. */ ++ retval = __nfs4acl_propagate_everyone(x, ace, ++ group_allow); ++ if (retval) ++ return -1; ++ } ++ } ++ } ++ return 0; ++} ++ ++/** ++ * __nfs4acl_apply_masks - apply the masks to the acl entries ++ * @x: acl and number of allocated entries ++ * ++ * Apply the owner file mask to owner@ entries, the intersection of the ++ * group and other file masks to everyone@ entries, and the group file ++ * mask to all other entries. ++ */ ++static int ++__nfs4acl_apply_masks(struct nfs4acl_alloc *x) ++{ ++ struct nfs4ace *ace; ++ ++ nfs4acl_for_each_entry(ace, x->acl) { ++ unsigned int mask; ++ ++ if (nfs4ace_is_inherit_only(ace) || !nfs4ace_is_allow(ace)) ++ continue; ++ if (nfs4ace_is_owner(ace)) ++ mask = x->acl->a_owner_mask; ++ else if (nfs4ace_is_everyone(ace)) ++ mask = x->acl->a_other_mask; ++ else ++ mask = x->acl->a_group_mask; ++ if (nfs4ace_change_mask(x, &ace, ace->e_mask & mask)) ++ return -1; ++ } ++ return 0; ++} ++ ++/** ++ * nfs4acl_max_allowed - maximum mask flags that anybody is allowed ++ */ ++static unsigned int ++nfs4acl_max_allowed(struct nfs4acl *acl) ++{ ++ struct nfs4ace *ace; ++ unsigned int allowed = 0; ++ ++ nfs4acl_for_each_entry_reverse(ace, acl) { ++ if (nfs4ace_is_inherit_only(ace)) ++ continue; ++ if (nfs4ace_is_allow(ace)) ++ allowed |= ace->e_mask; ++ else if (nfs4ace_is_deny(ace)) { ++ if (nfs4ace_is_everyone(ace)) ++ allowed &= ~ace->e_mask; ++ } ++ } ++ return allowed; ++} ++ ++/** ++ * nfs4acl_isolate_owner_class - limit the owner class to the owner file mask ++ * @x: acl and number of allocated entries ++ * ++ * Make sure the owner class (owner@) is granted no more than the owner ++ * mask by first checking which permissions anyone is granted, and then ++ * denying owner@ all permissions beyond that. ++ */ ++static int ++nfs4acl_isolate_owner_class(struct nfs4acl_alloc *x) ++{ ++ struct nfs4ace *ace; ++ unsigned int allowed = 0; ++ ++ allowed = nfs4acl_max_allowed(x->acl); ++ if (allowed & ~x->acl->a_owner_mask) { ++ /* Figure out if we can update an existig OWNER@ DENY entry. */ ++ nfs4acl_for_each_entry(ace, x->acl) { ++ if (nfs4ace_is_inherit_only(ace)) ++ continue; ++ if (nfs4ace_is_deny(ace)) { ++ if (nfs4ace_is_owner(ace)) ++ break; ++ } else if (nfs4ace_is_allow(ace)) { ++ ace = x->acl->a_entries + x->acl->a_count; ++ break; ++ } ++ } ++ if (ace != x->acl->a_entries + x->acl->a_count) { ++ if (nfs4ace_change_mask(x, &ace, ace->e_mask | ++ (allowed & ~x->acl->a_owner_mask))) ++ return -1; ++ } else { ++ /* Insert an owner@ deny entry at the front. */ ++ ace = x->acl->a_entries; ++ if (nfs4acl_insert_entry(x, &ace)) ++ return -1; ++ ace->e_type = ACE4_ACCESS_DENIED_ACE_TYPE; ++ ace->e_flags = ACE4_SPECIAL_WHO; ++ ace->e_mask = allowed & ~x->acl->a_owner_mask; ++ ace->u.e_who = nfs4ace_owner_who; ++ } ++ } ++ return 0; ++} ++ ++/** ++ * __nfs4acl_isolate_who - isolate entry from EVERYONE@ ALLOW entry ++ * @x: acl and number of allocated entries ++ * @who: identifier to isolate ++ * @deny: mask flags this identifier should not be allowed ++ * ++ * Make sure that @who is not allowed any mask flags in @deny by checking ++ * which mask flags this identifier is allowed, and adding excess allowed ++ * mask flags to an existing DENY entry before the trailing EVERYONE@ ALLOW ++ * entry, or inserting such an entry. ++ */ ++static int ++__nfs4acl_isolate_who(struct nfs4acl_alloc *x, struct nfs4ace *who, ++ unsigned int deny) ++{ ++ struct nfs4ace *ace; ++ unsigned int allowed = 0, n; ++ ++ /* Compute the mask flags granted to this who value. */ ++ nfs4acl_for_each_entry_reverse(ace, x->acl) { ++ if (nfs4ace_is_inherit_only(ace)) ++ continue; ++ if (nfs4ace_is_same_who(ace, who)) { ++ if (nfs4ace_is_allow(ace)) ++ allowed |= ace->e_mask; ++ else if (nfs4ace_is_deny(ace)) ++ allowed &= ~ace->e_mask; ++ deny &= ~ace->e_mask; ++ } ++ } ++ if (!deny) ++ return 0; ++ ++ /* Figure out if we can update an existig DENY entry. Start ++ from the entry before the trailing EVERYONE@ ALLOW entry. We ++ will not hit EVERYONE@ entries in the loop. */ ++ for (n = x->acl->a_count - 2; n != -1; n--) { ++ ace = x->acl->a_entries + n; ++ if (nfs4ace_is_inherit_only(ace)) ++ continue; ++ if (nfs4ace_is_deny(ace)) { ++ if (nfs4ace_is_same_who(ace, who)) ++ break; ++ } else if (nfs4ace_is_allow(ace) && ++ (ace->e_mask & deny)) { ++ n = -1; ++ break; ++ } ++ } ++ if (n != -1) { ++ if (nfs4ace_change_mask(x, &ace, ace->e_mask | deny)) ++ return -1; ++ } else { ++ /* Insert a eny entry before the trailing EVERYONE@ DENY ++ entry. */ ++ struct nfs4ace who_copy; ++ ++ ace = x->acl->a_entries + x->acl->a_count - 1; ++ memcpy(&who_copy, who, sizeof(struct nfs4ace)); ++ if (nfs4acl_insert_entry(x, &ace)) ++ return -1; ++ memcpy(ace, &who_copy, sizeof(struct nfs4ace)); ++ ace->e_type = ACE4_ACCESS_DENIED_ACE_TYPE; ++ nfs4ace_clear_inheritance_flags(ace); ++ ace->e_mask = deny; ++ } ++ return 0; ++} ++ ++/** ++ * nfs4acl_isolate_group_class - limit the group class to the group file mask ++ * @x: acl and number of allocated entries ++ * ++ * Make sure the group class (all entries except owner@ and everyone@) is ++ * granted no more than the group mask by inserting DENY entries for group ++ * class entries where necessary. ++ */ ++static int ++nfs4acl_isolate_group_class(struct nfs4acl_alloc *x) ++{ ++ struct nfs4ace who = { ++ .e_flags = ACE4_SPECIAL_WHO, ++ .u.e_who = nfs4ace_group_who, ++ }; ++ struct nfs4ace *ace; ++ unsigned int deny; ++ ++ if (!x->acl->a_count) ++ return 0; ++ ace = x->acl->a_entries + x->acl->a_count - 1; ++ if (nfs4ace_is_inherit_only(ace) || !nfs4ace_is_everyone(ace)) ++ return 0; ++ deny = ace->e_mask & ~x->acl->a_group_mask; ++ ++ if (deny) { ++ unsigned int n; ++ ++ if (__nfs4acl_isolate_who(x, &who, deny)) ++ return -1; ++ ++ /* Start from the entry before the trailing EVERYONE@ ALLOW ++ entry. We will not hit EVERYONE@ entries in the loop. */ ++ for (n = x->acl->a_count - 2; n != -1; n--) { ++ ace = x->acl->a_entries + n; ++ ++ if (nfs4ace_is_inherit_only(ace) || ++ nfs4ace_is_owner(ace) || ++ nfs4ace_is_group(ace)) ++ continue; ++ if (__nfs4acl_isolate_who(x, ace, deny)) ++ return -1; ++ } ++ } ++ return 0; ++} ++ ++/** ++ * __nfs4acl_write_through - grant the full masks to owner@, group@, everyone@ ++ * ++ * Make sure that owner, group@, and everyone@ are allowed the full mask ++ * permissions, and not only the permissions granted both by the acl and ++ * the masks. ++ */ ++static int ++__nfs4acl_write_through(struct nfs4acl_alloc *x) ++{ ++ struct nfs4ace *ace; ++ unsigned int allowed; ++ ++ /* Remove all owner@ and group@ ACEs: we re-insert them at the ++ top. */ ++ nfs4acl_for_each_entry(ace, x->acl) { ++ if (nfs4ace_is_inherit_only(ace)) ++ continue; ++ if ((nfs4ace_is_owner(ace) || nfs4ace_is_group(ace)) && ++ nfs4ace_change_mask(x, &ace, 0)) ++ return -1; ++ } ++ ++ /* Insert the everyone@ allow entry at the end, or update the ++ existing entry. */ ++ allowed = x->acl->a_other_mask; ++ if (allowed & ~ACE4_POSIX_ALWAYS_ALLOWED) { ++ ace = x->acl->a_entries + x->acl->a_count - 1; ++ if (x->acl->a_count && nfs4ace_is_everyone(ace) && ++ !nfs4ace_is_inherit_only(ace)) { ++ if (nfs4ace_change_mask(x, &ace, allowed)) ++ return -1; ++ } else { ++ ace = x->acl->a_entries + x->acl->a_count; ++ if (nfs4acl_insert_entry(x, &ace)) ++ return -1; ++ ace->e_type = ACE4_ACCESS_ALLOWED_ACE_TYPE; ++ ace->e_flags = ACE4_SPECIAL_WHO; ++ ace->e_mask = allowed; ++ ace->u.e_who = nfs4ace_everyone_who; ++ } ++ } ++ ++ /* Compute the permissions that owner@ and group@ are already granted ++ though the everyone@ allow entry at the end. Note that the acl ++ contains no owner@ or group@ entries at this point. */ ++ allowed = 0; ++ nfs4acl_for_each_entry_reverse(ace, x->acl) { ++ if (nfs4ace_is_inherit_only(ace)) ++ continue; ++ if (nfs4ace_is_allow(ace)) { ++ if (nfs4ace_is_everyone(ace)) ++ allowed |= ace->e_mask; ++ } else if (nfs4ace_is_deny(ace)) ++ allowed &= ~ace->e_mask; ++ } ++ ++ /* Insert the appropriate group@ allow entry at the front. */ ++ if (x->acl->a_group_mask & ~allowed) { ++ ace = x->acl->a_entries; ++ if (nfs4acl_insert_entry(x, &ace)) ++ return -1; ++ ace->e_type = ACE4_ACCESS_ALLOWED_ACE_TYPE; ++ ace->e_flags = ACE4_SPECIAL_WHO; ++ ace->e_mask = x->acl->a_group_mask /*& ~allowed*/; ++ ace->u.e_who = nfs4ace_group_who; ++ } ++ ++ /* Insert the appropriate owner@ allow entry at the front. */ ++ if (x->acl->a_owner_mask & ~allowed) { ++ ace = x->acl->a_entries; ++ if (nfs4acl_insert_entry(x, &ace)) ++ return -1; ++ ace->e_type = ACE4_ACCESS_ALLOWED_ACE_TYPE; ++ ace->e_flags = ACE4_SPECIAL_WHO; ++ ace->e_mask = x->acl->a_owner_mask /*& ~allowed*/; ++ ace->u.e_who = nfs4ace_owner_who; ++ } ++ ++ /* Insert the appropriate owner@ deny entry at the front. */ ++ allowed = nfs4acl_max_allowed(x->acl); ++ if (allowed & ~x->acl->a_owner_mask) { ++ nfs4acl_for_each_entry(ace, x->acl) { ++ if (nfs4ace_is_inherit_only(ace)) ++ continue; ++ if (nfs4ace_is_allow(ace)) { ++ ace = x->acl->a_entries + x->acl->a_count; ++ break; ++ } ++ if (nfs4ace_is_deny(ace) && nfs4ace_is_owner(ace)) ++ break; ++ } ++ if (ace != x->acl->a_entries + x->acl->a_count) { ++ if (nfs4ace_change_mask(x, &ace, ace->e_mask | ++ (allowed & ~x->acl->a_owner_mask))) ++ return -1; ++ } else { ++ ace = x->acl->a_entries; ++ if (nfs4acl_insert_entry(x, &ace)) ++ return -1; ++ ace->e_type = ACE4_ACCESS_DENIED_ACE_TYPE; ++ ace->e_flags = ACE4_SPECIAL_WHO; ++ ace->e_mask = allowed & ~x->acl->a_owner_mask; ++ ace->u.e_who = nfs4ace_owner_who; ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * nfs4acl_apply_masks - apply the masks to the acl ++ * ++ * Apply the masks so that the acl allows no more flags than the ++ * intersection between the flags that the original acl allows and the ++ * mask matching the process. ++ * ++ * Note: this algorithm may push the number of entries in the acl above ++ * ACL4_XATTR_MAX_COUNT, so a read-modify-write cycle would fail. ++ */ ++int ++nfs4acl_apply_masks(struct nfs4acl **acl) ++{ ++ struct nfs4acl_alloc x = { ++ .acl = *acl, ++ .count = (*acl)->a_count, ++ }; ++ int retval = 0; ++ ++ if (nfs4acl_move_everyone_aces_down(&x) || ++ nfs4acl_propagate_everyone(&x) || ++ __nfs4acl_apply_masks(&x) || ++ nfs4acl_isolate_owner_class(&x) || ++ nfs4acl_isolate_group_class(&x)) ++ retval = -ENOMEM; ++ ++ *acl = x.acl; ++ return retval; ++} ++EXPORT_SYMBOL(nfs4acl_apply_masks); ++ ++int nfs4acl_write_through(struct nfs4acl **acl) ++{ ++ struct nfs4acl_alloc x = { ++ .acl = *acl, ++ .count = (*acl)->a_count, ++ }; ++ int retval = 0; ++ ++ if (!((*acl)->a_flags & ACL4_WRITE_THROUGH)) ++ goto out; ++ ++ if (nfs4acl_move_everyone_aces_down(&x) || ++ nfs4acl_propagate_everyone(&x) || ++ __nfs4acl_write_through(&x)) ++ retval = -ENOMEM; ++ ++ *acl = x.acl; ++out: ++ return retval; ++} +--- /dev/null ++++ b/fs/nfs4acl_xattr.c +@@ -0,0 +1,146 @@ ++/* ++ * Copyright (C) 2006 Andreas Gruenbacher ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2, or (at your option) any ++ * later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++MODULE_LICENSE("GPL"); ++ ++struct nfs4acl * ++nfs4acl_from_xattr(const void *value, size_t size) ++{ ++ const struct nfs4acl_xattr *xattr_acl = value; ++ const struct nfs4ace_xattr *xattr_ace = (void *)(xattr_acl + 1); ++ struct nfs4acl *acl; ++ struct nfs4ace *ace; ++ int count; ++ ++ if (size < sizeof(struct nfs4acl_xattr) || ++ xattr_acl->a_version != ACL4_XATTR_VERSION || ++ (xattr_acl->a_flags & ~ACL4_VALID_FLAGS)) ++ return ERR_PTR(-EINVAL); ++ ++ count = be16_to_cpu(xattr_acl->a_count); ++ if (count > ACL4_XATTR_MAX_COUNT) ++ return ERR_PTR(-EINVAL); ++ ++ acl = nfs4acl_alloc(count); ++ if (!acl) ++ return ERR_PTR(-ENOMEM); ++ ++ acl->a_flags = xattr_acl->a_flags; ++ acl->a_owner_mask = be32_to_cpu(xattr_acl->a_owner_mask); ++ if (acl->a_owner_mask & ~ACE4_VALID_MASK) ++ goto fail_einval; ++ acl->a_group_mask = be32_to_cpu(xattr_acl->a_group_mask); ++ if (acl->a_group_mask & ~ACE4_VALID_MASK) ++ goto fail_einval; ++ acl->a_other_mask = be32_to_cpu(xattr_acl->a_other_mask); ++ if (acl->a_other_mask & ~ACE4_VALID_MASK) ++ goto fail_einval; ++ ++ nfs4acl_for_each_entry(ace, acl) { ++ const char *who = (void *)(xattr_ace + 1), *end; ++ ssize_t used = (void *)who - value; ++ ++ if (used > size) ++ goto fail_einval; ++ end = memchr(who, 0, size - used); ++ if (!end) ++ goto fail_einval; ++ ++ ace->e_type = be16_to_cpu(xattr_ace->e_type); ++ ace->e_flags = be16_to_cpu(xattr_ace->e_flags); ++ ace->e_mask = be32_to_cpu(xattr_ace->e_mask); ++ ace->u.e_id = be32_to_cpu(xattr_ace->e_id); ++ ++ if (ace->e_flags & ~ACE4_VALID_FLAGS) { ++ memset(ace, 0, sizeof(struct nfs4ace)); ++ goto fail_einval; ++ } ++ if (ace->e_type > ACE4_ACCESS_DENIED_ACE_TYPE || ++ (ace->e_mask & ~ACE4_VALID_MASK)) ++ goto fail_einval; ++ ++ if (who == end) { ++ if (ace->u.e_id == -1) ++ goto fail_einval; /* uid/gid needed */ ++ } else if (nfs4ace_set_who(ace, who)) ++ goto fail_einval; ++ ++ xattr_ace = (void *)who + ALIGN(end - who + 1, 4); ++ } ++ ++ return acl; ++ ++fail_einval: ++ nfs4acl_put(acl); ++ return ERR_PTR(-EINVAL); ++} ++EXPORT_SYMBOL(nfs4acl_from_xattr); ++ ++size_t ++nfs4acl_xattr_size(const struct nfs4acl *acl) ++{ ++ size_t size = sizeof(struct nfs4acl_xattr); ++ const struct nfs4ace *ace; ++ ++ nfs4acl_for_each_entry(ace, acl) { ++ size += sizeof(struct nfs4ace_xattr) + ++ (nfs4ace_is_unix_id(ace) ? 4 : ++ ALIGN(strlen(ace->u.e_who) + 1, 4)); ++ } ++ return size; ++} ++EXPORT_SYMBOL(nfs4acl_xattr_size); ++ ++void ++nfs4acl_to_xattr(const struct nfs4acl *acl, void *buffer) ++{ ++ struct nfs4acl_xattr *xattr_acl = buffer; ++ struct nfs4ace_xattr *xattr_ace; ++ const struct nfs4ace *ace; ++ ++ xattr_acl->a_version = ACL4_XATTR_VERSION; ++ xattr_acl->a_flags = acl->a_flags; ++ xattr_acl->a_count = cpu_to_be16(acl->a_count); ++ ++ xattr_acl->a_owner_mask = cpu_to_be32(acl->a_owner_mask); ++ xattr_acl->a_group_mask = cpu_to_be32(acl->a_group_mask); ++ xattr_acl->a_other_mask = cpu_to_be32(acl->a_other_mask); ++ ++ xattr_ace = (void *)(xattr_acl + 1); ++ nfs4acl_for_each_entry(ace, acl) { ++ xattr_ace->e_type = cpu_to_be16(ace->e_type); ++ xattr_ace->e_flags = cpu_to_be16(ace->e_flags & ++ ACE4_VALID_FLAGS); ++ xattr_ace->e_mask = cpu_to_be32(ace->e_mask); ++ if (nfs4ace_is_unix_id(ace)) { ++ xattr_ace->e_id = cpu_to_be32(ace->u.e_id); ++ memset(xattr_ace->e_who, 0, 4); ++ xattr_ace = (void *)xattr_ace->e_who + 4; ++ } else { ++ int sz = ALIGN(strlen(ace->u.e_who) + 1, 4); ++ ++ xattr_ace->e_id = cpu_to_be32(-1); ++ memset(xattr_ace->e_who + sz - 4, 0, 4); ++ strcpy(xattr_ace->e_who, ace->u.e_who); ++ xattr_ace = (void *)xattr_ace->e_who + sz; ++ } ++ } ++} ++EXPORT_SYMBOL(nfs4acl_to_xattr); +--- /dev/null ++++ b/include/linux/nfs4acl.h +@@ -0,0 +1,205 @@ ++#ifndef __NFS4ACL_H ++#define __NFS4ACL_H ++ ++struct nfs4ace { ++ unsigned short e_type; ++ unsigned short e_flags; ++ unsigned int e_mask; ++ union { ++ unsigned int e_id; ++ const char *e_who; ++ } u; ++}; ++ ++struct nfs4acl { ++ atomic_t a_refcount; ++ unsigned int a_owner_mask; ++ unsigned int a_group_mask; ++ unsigned int a_other_mask; ++ unsigned short a_count; ++ unsigned short a_flags; ++ struct nfs4ace a_entries[0]; ++}; ++ ++#define nfs4acl_for_each_entry(_ace, _acl) \ ++ for (_ace = _acl->a_entries; \ ++ _ace != _acl->a_entries + _acl->a_count; \ ++ _ace++) ++ ++#define nfs4acl_for_each_entry_reverse(_ace, _acl) \ ++ for (_ace = _acl->a_entries + _acl->a_count - 1; \ ++ _ace != _acl->a_entries - 1; \ ++ _ace--) ++ ++/* a_flags values */ ++#define ACL4_WRITE_THROUGH 0x40 ++ ++#define ACL4_VALID_FLAGS \ ++ ACL4_WRITE_THROUGH ++ ++/* e_type values */ ++#define ACE4_ACCESS_ALLOWED_ACE_TYPE 0x0000 ++#define ACE4_ACCESS_DENIED_ACE_TYPE 0x0001 ++/*#define ACE4_SYSTEM_AUDIT_ACE_TYPE 0x0002*/ ++/*#define ACE4_SYSTEM_ALARM_ACE_TYPE 0x0003*/ ++ ++/* e_flags bitflags */ ++#define ACE4_FILE_INHERIT_ACE 0x0001 ++#define ACE4_DIRECTORY_INHERIT_ACE 0x0002 ++#define ACE4_NO_PROPAGATE_INHERIT_ACE 0x0004 ++#define ACE4_INHERIT_ONLY_ACE 0x0008 ++/*#define ACE4_SUCCESSFUL_ACCESS_ACE_FLAG 0x0010*/ ++/*#define ACE4_FAILED_ACCESS_ACE_FLAG 0x0020*/ ++#define ACE4_IDENTIFIER_GROUP 0x0040 ++#define ACE4_SPECIAL_WHO 0x4000 /* in-memory representation only */ ++ ++#define ACE4_VALID_FLAGS ( \ ++ ACE4_FILE_INHERIT_ACE | \ ++ ACE4_DIRECTORY_INHERIT_ACE | \ ++ ACE4_NO_PROPAGATE_INHERIT_ACE | \ ++ ACE4_INHERIT_ONLY_ACE | \ ++ ACE4_IDENTIFIER_GROUP ) ++ ++/* e_mask bitflags */ ++#define ACE4_READ_DATA 0x00000001 ++#define ACE4_LIST_DIRECTORY 0x00000001 ++#define ACE4_WRITE_DATA 0x00000002 ++#define ACE4_ADD_FILE 0x00000002 ++#define ACE4_APPEND_DATA 0x00000004 ++#define ACE4_ADD_SUBDIRECTORY 0x00000004 ++#define ACE4_READ_NAMED_ATTRS 0x00000008 ++#define ACE4_WRITE_NAMED_ATTRS 0x00000010 ++#define ACE4_EXECUTE 0x00000020 ++#define ACE4_DELETE_CHILD 0x00000040 ++#define ACE4_READ_ATTRIBUTES 0x00000080 ++#define ACE4_WRITE_ATTRIBUTES 0x00000100 ++#define ACE4_DELETE 0x00010000 ++#define ACE4_READ_ACL 0x00020000 ++#define ACE4_WRITE_ACL 0x00040000 ++#define ACE4_WRITE_OWNER 0x00080000 ++#define ACE4_SYNCHRONIZE 0x00100000 ++ ++#define ACE4_VALID_MASK ( \ ++ ACE4_READ_DATA | ACE4_LIST_DIRECTORY | \ ++ ACE4_WRITE_DATA | ACE4_ADD_FILE | \ ++ ACE4_APPEND_DATA | ACE4_ADD_SUBDIRECTORY | \ ++ ACE4_READ_NAMED_ATTRS | \ ++ ACE4_WRITE_NAMED_ATTRS | \ ++ ACE4_EXECUTE | \ ++ ACE4_DELETE_CHILD | \ ++ ACE4_READ_ATTRIBUTES | \ ++ ACE4_WRITE_ATTRIBUTES | \ ++ ACE4_DELETE | \ ++ ACE4_READ_ACL | \ ++ ACE4_WRITE_ACL | \ ++ ACE4_WRITE_OWNER | \ ++ ACE4_SYNCHRONIZE ) ++ ++#define ACE4_POSIX_ALWAYS_ALLOWED ( \ ++ ACE4_SYNCHRONIZE | \ ++ ACE4_READ_ATTRIBUTES | \ ++ ACE4_READ_ACL ) ++/* ++ * Duplicate an NFS4ACL handle. ++ */ ++static inline struct nfs4acl * ++nfs4acl_get(struct nfs4acl *acl) ++{ ++ if (acl) ++ atomic_inc(&acl->a_refcount); ++ return acl; ++} ++ ++/* ++ * Free an NFS4ACL handle ++ */ ++static inline void ++nfs4acl_put(struct nfs4acl *acl) ++{ ++ if (acl && atomic_dec_and_test(&acl->a_refcount)) ++ kfree(acl); ++} ++ ++/* Special e_who identifiers: we use these pointer values in comparisons ++ instead of strcmp for efficiency. */ ++ ++extern const char nfs4ace_owner_who[]; ++extern const char nfs4ace_group_who[]; ++extern const char nfs4ace_everyone_who[]; ++ ++static inline int ++nfs4ace_is_owner(const struct nfs4ace *ace) ++{ ++ return (ace->e_flags & ACE4_SPECIAL_WHO) && ++ ace->u.e_who == nfs4ace_owner_who; ++} ++ ++static inline int ++nfs4ace_is_group(const struct nfs4ace *ace) ++{ ++ return (ace->e_flags & ACE4_SPECIAL_WHO) && ++ ace->u.e_who == nfs4ace_group_who; ++} ++ ++static inline int ++nfs4ace_is_everyone(const struct nfs4ace *ace) ++{ ++ return (ace->e_flags & ACE4_SPECIAL_WHO) && ++ ace->u.e_who == nfs4ace_everyone_who; ++} ++ ++static inline int ++nfs4ace_is_unix_id(const struct nfs4ace *ace) ++{ ++ return !(ace->e_flags & ACE4_SPECIAL_WHO); ++} ++ ++static inline int ++nfs4ace_is_inherit_only(const struct nfs4ace *ace) ++{ ++ return ace->e_flags & ACE4_INHERIT_ONLY_ACE; ++} ++ ++static inline int ++nfs4ace_is_inheritable(const struct nfs4ace *ace) ++{ ++ return ace->e_flags & (ACE4_FILE_INHERIT_ACE | ++ ACE4_DIRECTORY_INHERIT_ACE); ++} ++ ++static inline void ++nfs4ace_clear_inheritance_flags(struct nfs4ace *ace) ++{ ++ ace->e_flags &= ~(ACE4_FILE_INHERIT_ACE | ++ ACE4_DIRECTORY_INHERIT_ACE | ++ ACE4_NO_PROPAGATE_INHERIT_ACE | ++ ACE4_INHERIT_ONLY_ACE); ++} ++ ++static inline int ++nfs4ace_is_allow(const struct nfs4ace *ace) ++{ ++ return ace->e_type == ACE4_ACCESS_ALLOWED_ACE_TYPE; ++} ++ ++static inline int ++nfs4ace_is_deny(const struct nfs4ace *ace) ++{ ++ return ace->e_type == ACE4_ACCESS_DENIED_ACE_TYPE; ++} ++ ++extern struct nfs4acl *nfs4acl_alloc(int count); ++extern struct nfs4acl *nfs4acl_clone(const struct nfs4acl *acl); ++ ++extern unsigned int nfs4acl_want_to_mask(int want); ++extern int nfs4acl_permission(struct inode *, const struct nfs4acl *, unsigned int); ++extern int nfs4acl_generic_permission(struct inode *, unsigned int); ++extern int nfs4ace_is_same_who(const struct nfs4ace *, const struct nfs4ace *); ++extern int nfs4ace_set_who(struct nfs4ace *ace, const char *who); ++extern struct nfs4acl *nfs4acl_inherit(const struct nfs4acl *, mode_t); ++extern int nfs4acl_masks_to_mode(const struct nfs4acl *); ++extern struct nfs4acl *nfs4acl_chmod(struct nfs4acl *, mode_t); ++extern int nfs4acl_apply_masks(struct nfs4acl **acl); ++extern int nfs4acl_write_through(struct nfs4acl **acl); ++ ++#endif /* __NFS4ACL_H */ +--- /dev/null ++++ b/include/linux/nfs4acl_xattr.h +@@ -0,0 +1,32 @@ ++#ifndef __NFS4ACL_XATTR_H ++#define __NFS4ACL_XATTR_H ++ ++#include ++ ++#define NFS4ACL_XATTR "system.nfs4acl" ++ ++struct nfs4ace_xattr { ++ __be16 e_type; ++ __be16 e_flags; ++ __be32 e_mask; ++ __be32 e_id; ++ char e_who[0]; ++}; ++ ++struct nfs4acl_xattr { ++ unsigned char a_version; ++ unsigned char a_flags; ++ __be16 a_count; ++ __be32 a_owner_mask; ++ __be32 a_group_mask; ++ __be32 a_other_mask; ++}; ++ ++#define ACL4_XATTR_VERSION 0 ++#define ACL4_XATTR_MAX_COUNT 1024 ++ ++extern struct nfs4acl *nfs4acl_from_xattr(const void *, size_t); ++extern size_t nfs4acl_xattr_size(const struct nfs4acl *acl); ++extern void nfs4acl_to_xattr(const struct nfs4acl *, void *); ++ ++#endif /* __NFS4ACL_XATTR_H */ diff --git a/src/patches/suse-2.6.27.25/patches.suse/nfs4acl-ext3.diff b/src/patches/suse-2.6.27.25/patches.suse/nfs4acl-ext3.diff new file mode 100644 index 0000000000..938643e656 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/nfs4acl-ext3.diff @@ -0,0 +1,906 @@ +From: Andreas Gruenbacher +Subject: NFSv4 ACLs for ext3 + +With the acl=nfs4 mount option, ext3 will use NFSv4 ACLs instead of +POSIX ACLs. See http://www.suse.de/~agruen/nfs4acl/ for some +documentation and examples. + +Signed-off-by: Andreas Gruenbacher + +--- + fs/Kconfig | 7 + fs/ext3/Makefile | 1 + fs/ext3/acl.c | 8 + fs/ext3/acl.h | 4 + fs/ext3/file.c | 4 + fs/ext3/ialloc.c | 6 + fs/ext3/inode.c | 73 ++++++++- + fs/ext3/namei.c | 15 + + fs/ext3/namei.h | 1 + fs/ext3/nfs4acl.c | 370 ++++++++++++++++++++++++++++++++++++++++++++++ + fs/ext3/nfs4acl.h | 36 ++++ + fs/ext3/super.c | 60 +++++-- + fs/ext3/xattr.c | 9 + + fs/ext3/xattr.h | 5 + include/linux/ext3_fs.h | 1 + include/linux/ext3_fs_i.h | 3 + 16 files changed, 577 insertions(+), 26 deletions(-) + +--- a/fs/Kconfig ++++ b/fs/Kconfig +@@ -124,6 +124,13 @@ config EXT3_FS_POSIX_ACL + + If you don't know what Access Control Lists are, say N + ++config EXT3_FS_NFS4ACL ++ bool "Native NFSv4 ACLs (EXPERIMENTAL)" ++ depends on EXT3_FS_XATTR && EXPERIMENTAL ++ select FS_NFS4ACL ++ help ++ Allow to use NFSv4 ACLs instead of POSIX ACLs. ++ + config EXT3_FS_SECURITY + bool "Ext3 Security Labels" + depends on EXT3_FS_XATTR +--- a/fs/ext3/Makefile ++++ b/fs/ext3/Makefile +@@ -10,3 +10,4 @@ ext3-y := balloc.o bitmap.o dir.o file.o + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o + ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o + ext3-$(CONFIG_EXT3_FS_SECURITY) += xattr_security.o ++ext3-$(CONFIG_EXT3_FS_NFS4ACL) += nfs4acl.o +--- a/fs/ext3/acl.c ++++ b/fs/ext3/acl.c +@@ -282,7 +282,7 @@ ext3_set_acl(handle_t *handle, struct in + return error; + } + +-static int ++int + ext3_check_acl(struct inode *inode, int mask) + { + struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); +@@ -298,12 +298,6 @@ ext3_check_acl(struct inode *inode, int + return -EAGAIN; + } + +-int +-ext3_permission(struct inode *inode, int mask) +-{ +- return generic_permission(inode, mask, ext3_check_acl); +-} +- + /* + * Initialize the ACLs of a new inode. Called from ext3_new_inode. + * +--- a/fs/ext3/acl.h ++++ b/fs/ext3/acl.h +@@ -58,13 +58,13 @@ static inline int ext3_acl_count(size_t + #define EXT3_ACL_NOT_CACHED ((void *)-1) + + /* acl.c */ +-extern int ext3_permission (struct inode *, int); ++extern int ext3_check_acl (struct inode *, int); + extern int ext3_acl_chmod (struct inode *); + extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); + + #else /* CONFIG_EXT3_FS_POSIX_ACL */ + #include +-#define ext3_permission NULL ++#define ext3_check_acl NULL + + static inline int + ext3_acl_chmod(struct inode *inode) +--- a/fs/ext3/file.c ++++ b/fs/ext3/file.c +@@ -23,8 +23,10 @@ + #include + #include + #include ++#include "namei.h" + #include "xattr.h" + #include "acl.h" ++#include "nfs4acl.h" + + /* + * Called when an inode is released. Note that this is different +@@ -134,5 +136,7 @@ const struct inode_operations ext3_file_ + .removexattr = generic_removexattr, + #endif + .permission = ext3_permission, ++ .may_create = ext3_may_create, ++ .may_delete = ext3_may_delete, + }; + +--- a/fs/ext3/ialloc.c ++++ b/fs/ext3/ialloc.c +@@ -28,6 +28,7 @@ + + #include "xattr.h" + #include "acl.h" ++#include "nfs4acl.h" + + /* + * ialloc.c contains the inodes allocation and deallocation routines +@@ -595,7 +596,10 @@ got: + goto fail_drop; + } + +- err = ext3_init_acl(handle, inode, dir); ++ if (test_opt(sb, NFS4ACL)) ++ err = ext3_nfs4acl_init(handle, inode, dir); ++ else ++ err = ext3_init_acl(handle, inode, dir); + if (err) + goto fail_free_drop; + +--- a/fs/ext3/inode.c ++++ b/fs/ext3/inode.c +@@ -38,6 +38,7 @@ + #include + #include "xattr.h" + #include "acl.h" ++#include "nfs4acl.h" + + static int ext3_writepage_trans_blocks(struct inode *inode); + +@@ -2684,6 +2685,9 @@ struct inode *ext3_iget(struct super_blo + ei->i_acl = EXT3_ACL_NOT_CACHED; + ei->i_default_acl = EXT3_ACL_NOT_CACHED; + #endif ++#ifdef CONFIG_EXT3_FS_NFS4ACL ++ ei->i_nfs4acl = EXT3_NFS4ACL_NOT_CACHED; ++#endif + ei->i_block_alloc_info = NULL; + + ret = __ext3_get_inode_loc(inode, &iloc, 0); +@@ -2983,6 +2987,65 @@ int ext3_write_inode(struct inode *inode + return ext3_force_commit(inode->i_sb); + } + ++#ifdef CONFIG_EXT3_FS_NFS4ACL ++static int ext3_inode_change_ok(struct inode *inode, struct iattr *attr) ++{ ++ unsigned int ia_valid = attr->ia_valid; ++ ++ if (!test_opt(inode->i_sb, NFS4ACL)) ++ return inode_change_ok(inode, attr); ++ ++ /* If force is set do it anyway. */ ++ if (ia_valid & ATTR_FORCE) ++ return 0; ++ ++ /* Make sure a caller can chown. */ ++ if ((ia_valid & ATTR_UID) && ++ (current->fsuid != inode->i_uid || ++ attr->ia_uid != inode->i_uid) && ++ (current->fsuid != attr->ia_uid || ++ ext3_nfs4acl_permission(inode, ACE4_WRITE_OWNER)) && ++ !capable(CAP_CHOWN)) ++ goto error; ++ ++ /* Make sure caller can chgrp. */ ++ if ((ia_valid & ATTR_GID)) { ++ int in_group = in_group_p(attr->ia_gid); ++ if ((current->fsuid != inode->i_uid || ++ (!in_group && attr->ia_gid != inode->i_gid)) && ++ (!in_group || ++ ext3_nfs4acl_permission(inode, ACE4_WRITE_OWNER)) && ++ !capable(CAP_CHOWN)) ++ goto error; ++ } ++ ++ /* Make sure a caller can chmod. */ ++ if (ia_valid & ATTR_MODE) { ++ if (current->fsuid != inode->i_uid && ++ ext3_nfs4acl_permission(inode, ACE4_WRITE_ACL) && ++ !capable(CAP_FOWNER)) ++ goto error; ++ /* Also check the setgid bit! */ ++ if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : ++ inode->i_gid) && !capable(CAP_FSETID)) ++ attr->ia_mode &= ~S_ISGID; ++ } ++ ++ /* Check for setting the inode time. */ ++ if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) { ++ if (current->fsuid != inode->i_uid && ++ ext3_nfs4acl_permission(inode, ACE4_WRITE_ATTRIBUTES) && ++ !capable(CAP_FOWNER)) ++ goto error; ++ } ++ return 0; ++error: ++ return -EPERM; ++} ++#else ++# define ext3_inode_change_ok inode_change_ok ++#endif ++ + /* + * ext3_setattr() + * +@@ -3006,7 +3069,7 @@ int ext3_setattr(struct dentry *dentry, + int error, rc = 0; + const unsigned int ia_valid = attr->ia_valid; + +- error = inode_change_ok(inode, attr); ++ error = ext3_inode_change_ok(inode, attr); + if (error) + return error; + +@@ -3063,8 +3126,12 @@ int ext3_setattr(struct dentry *dentry, + if (inode->i_nlink) + ext3_orphan_del(NULL, inode); + +- if (!rc && (ia_valid & ATTR_MODE)) +- rc = ext3_acl_chmod(inode); ++ if (!rc && (ia_valid & ATTR_MODE)) { ++ if (test_opt(inode->i_sb, NFS4ACL)) ++ rc = ext3_nfs4acl_chmod(inode); ++ else ++ rc = ext3_acl_chmod(inode); ++ } + + err_out: + ext3_std_error(inode->i_sb, error); +--- a/fs/ext3/namei.c ++++ b/fs/ext3/namei.c +@@ -40,6 +40,7 @@ + #include "namei.h" + #include "xattr.h" + #include "acl.h" ++#include "nfs4acl.h" + + /* + * define how far ahead to read directories while searching them. +@@ -2412,6 +2413,16 @@ end_rename: + return retval; + } + ++int ext3_permission(struct inode *inode, int mask) ++{ ++#ifdef CONFIG_EXT3_FS_NFS4ACL ++ if (test_opt(inode->i_sb, NFS4ACL)) ++ return ext3_nfs4acl_permission(inode, nfs4acl_want_to_mask(mask)); ++ else ++#endif ++ return generic_permission(inode, mask, ext3_check_acl); ++} ++ + /* + * directories can handle most operations... + */ +@@ -2433,6 +2444,8 @@ const struct inode_operations ext3_dir_i + .removexattr = generic_removexattr, + #endif + .permission = ext3_permission, ++ .may_create = ext3_may_create, ++ .may_delete = ext3_may_delete, + }; + + const struct inode_operations ext3_special_inode_operations = { +@@ -2444,4 +2457,6 @@ const struct inode_operations ext3_speci + .removexattr = generic_removexattr, + #endif + .permission = ext3_permission, ++ .may_create = ext3_may_create, ++ .may_delete = ext3_may_delete, + }; +--- a/fs/ext3/namei.h ++++ b/fs/ext3/namei.h +@@ -5,4 +5,5 @@ + * + */ + ++extern int ext3_permission (struct inode *, int); + extern struct dentry *ext3_get_parent(struct dentry *child); +--- /dev/null ++++ b/fs/ext3/nfs4acl.c +@@ -0,0 +1,370 @@ ++/* ++ * Copyright (C) 2006 Andreas Gruenbacher ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2, or (at your option) any ++ * later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include "namei.h" ++#include "xattr.h" ++#include "nfs4acl.h" ++ ++static inline struct nfs4acl * ++ext3_iget_nfs4acl(struct inode *inode) ++{ ++ struct nfs4acl *acl = EXT3_NFS4ACL_NOT_CACHED; ++ struct ext3_inode_info *ei = EXT3_I(inode); ++ ++ spin_lock(&inode->i_lock); ++ if (ei->i_nfs4acl != EXT3_NFS4ACL_NOT_CACHED) ++ acl = nfs4acl_get(ei->i_nfs4acl); ++ spin_unlock(&inode->i_lock); ++ ++ return acl; ++} ++ ++static inline void ++ext3_iset_nfs4acl(struct inode *inode, struct nfs4acl *acl) ++{ ++ struct ext3_inode_info *ei = EXT3_I(inode); ++ ++ spin_lock(&inode->i_lock); ++ if (ei->i_nfs4acl != EXT3_NFS4ACL_NOT_CACHED) ++ nfs4acl_put(ei->i_nfs4acl); ++ ei->i_nfs4acl = nfs4acl_get(acl); ++ spin_unlock(&inode->i_lock); ++} ++ ++static struct nfs4acl * ++ext3_get_nfs4acl(struct inode *inode) ++{ ++ const int name_index = EXT3_XATTR_INDEX_NFS4ACL; ++ void *value = NULL; ++ struct nfs4acl *acl; ++ int retval; ++ ++ if (!test_opt(inode->i_sb, NFS4ACL)) ++ return NULL; ++ ++ acl = ext3_iget_nfs4acl(inode); ++ if (acl != EXT3_NFS4ACL_NOT_CACHED) ++ return acl; ++ retval = ext3_xattr_get(inode, name_index, "", NULL, 0); ++ if (retval > 0) { ++ value = kmalloc(retval, GFP_KERNEL); ++ if (!value) ++ return ERR_PTR(-ENOMEM); ++ retval = ext3_xattr_get(inode, name_index, "", value, retval); ++ } ++ if (retval > 0) { ++ acl = nfs4acl_from_xattr(value, retval); ++ if (acl == ERR_PTR(-EINVAL)) ++ acl = ERR_PTR(-EIO); ++ } else if (retval == -ENODATA || retval == -ENOSYS) ++ acl = NULL; ++ else ++ acl = ERR_PTR(retval); ++ kfree(value); ++ ++ if (!IS_ERR(acl)) ++ ext3_iset_nfs4acl(inode, acl); ++ ++ return acl; ++} ++ ++static int ++ext3_set_nfs4acl(handle_t *handle, struct inode *inode, struct nfs4acl *acl) ++{ ++ const int name_index = EXT3_XATTR_INDEX_NFS4ACL; ++ size_t size = 0; ++ void *value = NULL; ++ int retval; ++ ++ if (acl) { ++ size = nfs4acl_xattr_size(acl); ++ value = kmalloc(size, GFP_KERNEL); ++ if (!value) ++ return -ENOMEM; ++ nfs4acl_to_xattr(acl, value); ++ } ++ if (handle) ++ retval = ext3_xattr_set_handle(handle, inode, name_index, "", ++ value, size, 0); ++ else ++ retval = ext3_xattr_set(inode, name_index, "", value, size, 0); ++ if (value) ++ kfree(value); ++ if (!retval) ++ ext3_iset_nfs4acl(inode, acl); ++ ++ return retval; ++} ++ ++int ++ext3_nfs4acl_permission(struct inode *inode, unsigned int mask) ++{ ++ struct nfs4acl *acl; ++ int retval; ++ ++ BUG_ON(!test_opt(inode->i_sb, NFS4ACL)); ++ ++ acl = ext3_get_nfs4acl(inode); ++ if (!acl) ++ retval = nfs4acl_generic_permission(inode, mask); ++ else if (IS_ERR(acl)) ++ retval = PTR_ERR(acl); ++ else { ++ retval = nfs4acl_permission(inode, acl, mask); ++ nfs4acl_put(acl); ++ } ++ ++ return retval; ++} ++ ++int ext3_may_create(struct inode *dir, int isdir) ++{ ++ int error; ++ ++ if (test_opt(dir->i_sb, NFS4ACL)) { ++ unsigned int mask = (isdir ? ACE4_ADD_SUBDIRECTORY : ACE4_ADD_FILE) | ++ ACE4_EXECUTE; ++ ++ error = ext3_nfs4acl_permission(dir, mask); ++ } else ++ error = ext3_permission(dir, MAY_WRITE | MAY_EXEC); ++ ++ return error; ++} ++ ++static int check_sticky(struct inode *dir, struct inode *inode) ++{ ++ if (!(dir->i_mode & S_ISVTX)) ++ return 0; ++ if (inode->i_uid == current->fsuid) ++ return 0; ++ if (dir->i_uid == current->fsuid) ++ return 0; ++ return !capable(CAP_FOWNER); ++} ++ ++int ext3_may_delete(struct inode *dir, struct inode *inode) ++{ ++ int error; ++ ++ if (test_opt(inode->i_sb, NFS4ACL)) { ++ error = ext3_nfs4acl_permission(dir, ACE4_DELETE_CHILD | ACE4_EXECUTE); ++ if (!error && check_sticky(dir, inode)) ++ error = -EPERM; ++ if (error && !ext3_nfs4acl_permission(inode, ACE4_DELETE)) ++ error = 0; ++ } else { ++ error = ext3_permission(dir, MAY_WRITE | MAY_EXEC); ++ if (!error && check_sticky(dir, inode)) ++ error = -EPERM; ++ } ++ ++ return error; ++} ++ ++int ++ext3_nfs4acl_init(handle_t *handle, struct inode *inode, struct inode *dir) ++{ ++ struct nfs4acl *dir_acl = NULL, *acl; ++ int retval; ++ ++ if (!S_ISLNK(inode->i_mode)) ++ dir_acl = ext3_get_nfs4acl(dir); ++ if (!dir_acl || IS_ERR(dir_acl)) { ++ inode->i_mode &= ~current->fs->umask; ++ return PTR_ERR(dir_acl); ++ } ++ acl = nfs4acl_inherit(dir_acl, inode->i_mode); ++ nfs4acl_put(dir_acl); ++ ++ retval = PTR_ERR(acl); ++ if (acl && !IS_ERR(acl)) { ++ retval = ext3_set_nfs4acl(handle, inode, acl); ++ inode->i_mode = (inode->i_mode & ~S_IRWXUGO) | ++ nfs4acl_masks_to_mode(acl); ++ nfs4acl_put(acl); ++ } ++ return retval; ++} ++ ++int ++ext3_nfs4acl_chmod(struct inode *inode) ++{ ++ struct nfs4acl *acl; ++ int retval; ++ ++ if (S_ISLNK(inode->i_mode)) ++ return -EOPNOTSUPP; ++ acl = ext3_get_nfs4acl(inode); ++ if (!acl || IS_ERR(acl)) ++ return PTR_ERR(acl); ++ acl = nfs4acl_chmod(acl, inode->i_mode); ++ if (IS_ERR(acl)) ++ return PTR_ERR(acl); ++ retval = ext3_set_nfs4acl(NULL, inode, acl); ++ nfs4acl_put(acl); ++ ++ return retval; ++} ++ ++static size_t ++ext3_xattr_list_nfs4acl(struct inode *inode, char *list, size_t list_len, ++ const char *name, size_t name_len) ++{ ++ const size_t size = sizeof(NFS4ACL_XATTR); ++ ++ if (!test_opt(inode->i_sb, NFS4ACL)) ++ return 0; ++ if (list && size <= list_len) ++ memcpy(list, NFS4ACL_XATTR, size); ++ return size; ++} ++ ++static int ++ext3_xattr_get_nfs4acl(struct inode *inode, const char *name, void *buffer, ++ size_t buffer_size) ++{ ++ struct nfs4acl *acl; ++ size_t size; ++ ++ if (!test_opt(inode->i_sb, NFS4ACL)) ++ return -EOPNOTSUPP; ++ if (strcmp(name, "") != 0) ++ return -EINVAL; ++ ++ acl = ext3_get_nfs4acl(inode); ++ if (IS_ERR(acl)) ++ return PTR_ERR(acl); ++ if (acl == NULL) ++ return -ENODATA; ++ size = nfs4acl_xattr_size(acl); ++ if (buffer) { ++ if (size > buffer_size) ++ return -ERANGE; ++ nfs4acl_to_xattr(acl, buffer); ++ } ++ nfs4acl_put(acl); ++ ++ return size; ++} ++ ++#ifdef NFS4ACL_DEBUG ++static size_t ++ext3_xattr_list_masked_nfs4acl(struct inode *inode, char *list, size_t list_len, ++ const char *name, size_t name_len) ++{ ++ return 0; ++} ++ ++static int ++ext3_xattr_get_masked_nfs4acl(struct inode *inode, const char *name, ++ void *buffer, size_t buffer_size) ++{ ++ const int name_index = EXT3_XATTR_INDEX_NFS4ACL; ++ struct nfs4acl *acl; ++ void *xattr; ++ size_t size; ++ int retval; ++ ++ if (!test_opt(inode->i_sb, NFS4ACL)) ++ return -EOPNOTSUPP; ++ if (strcmp(name, "") != 0) ++ return -EINVAL; ++ retval = ext3_xattr_get(inode, name_index, "", NULL, 0); ++ if (retval <= 0) ++ return retval; ++ xattr = kmalloc(retval, GFP_KERNEL); ++ if (!xattr) ++ return -ENOMEM; ++ retval = ext3_xattr_get(inode, name_index, "", xattr, retval); ++ if (retval <= 0) ++ return retval; ++ acl = nfs4acl_from_xattr(xattr, retval); ++ kfree(xattr); ++ if (IS_ERR(acl)) ++ return PTR_ERR(acl); ++ retval = nfs4acl_apply_masks(&acl); ++ if (retval) { ++ nfs4acl_put(acl); ++ return retval; ++ } ++ size = nfs4acl_xattr_size(acl); ++ if (buffer) { ++ if (size > buffer_size) ++ return -ERANGE; ++ nfs4acl_to_xattr(acl, buffer); ++ } ++ nfs4acl_put(acl); ++ return size; ++} ++#endif ++ ++static int ++ext3_xattr_set_nfs4acl(struct inode *inode, const char *name, ++ const void *value, size_t size, int flags) ++{ ++ handle_t *handle; ++ struct nfs4acl *acl = NULL; ++ int retval, retries = 0; ++ ++ if (S_ISLNK(inode->i_mode) || !test_opt(inode->i_sb, NFS4ACL)) ++ return -EOPNOTSUPP; ++ if (strcmp(name, "") != 0) ++ return -EINVAL; ++ if (current->fsuid != inode->i_uid && ++ ext3_nfs4acl_permission(inode, ACE4_WRITE_ACL) && ++ !capable(CAP_FOWNER)) ++ return -EPERM; ++ if (value) { ++ acl = nfs4acl_from_xattr(value, size); ++ if (IS_ERR(acl)) ++ return PTR_ERR(acl); ++ ++ inode->i_mode &= ~S_IRWXUGO; ++ inode->i_mode |= nfs4acl_masks_to_mode(acl); ++ } ++ ++retry: ++ handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb)); ++ if (IS_ERR(handle)) ++ return PTR_ERR(handle); ++ ext3_mark_inode_dirty(handle, inode); ++ retval = ext3_set_nfs4acl(handle, inode, acl); ++ ext3_journal_stop(handle); ++ if (retval == ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) ++ goto retry; ++ nfs4acl_put(acl); ++ return retval; ++} ++ ++struct xattr_handler ext3_nfs4acl_xattr_handler = { ++ .prefix = NFS4ACL_XATTR, ++ .list = ext3_xattr_list_nfs4acl, ++ .get = ext3_xattr_get_nfs4acl, ++ .set = ext3_xattr_set_nfs4acl, ++}; ++ ++#ifdef NFS4ACL_DEBUG ++struct xattr_handler ext3_masked_nfs4acl_xattr_handler = { ++ .prefix = "system.masked-nfs4acl", ++ .list = ext3_xattr_list_masked_nfs4acl, ++ .get = ext3_xattr_get_masked_nfs4acl, ++ .set = ext3_xattr_set_nfs4acl, ++}; ++#endif +--- /dev/null ++++ b/fs/ext3/nfs4acl.h +@@ -0,0 +1,36 @@ ++#ifndef __FS_EXT3_NFS4ACL_H ++#define __FS_EXT3_NFS4ACL_H ++ ++#ifdef CONFIG_EXT3_FS_NFS4ACL ++ ++#include ++ ++/* Value for i_nfs4acl if NFS4ACL has not been cached */ ++#define EXT3_NFS4ACL_NOT_CACHED ((void *)-1) ++ ++extern int ext3_nfs4acl_permission(struct inode *, unsigned int); ++extern int ext3_may_create(struct inode *, int); ++extern int ext3_may_delete(struct inode *, struct inode *); ++extern int ext3_nfs4acl_init(handle_t *, struct inode *, struct inode *); ++extern int ext3_nfs4acl_chmod(struct inode *); ++ ++#else /* CONFIG_FS_EXT3_NFS4ACL */ ++ ++#define ext3_may_create NULL ++#define ext3_may_delete NULL ++ ++static inline int ++ext3_nfs4acl_init(handle_t *handle, struct inode *inode, struct inode *dir) ++{ ++ return 0; ++} ++ ++static inline int ++ext3_nfs4acl_chmod(struct inode *inode) ++{ ++ return 0; ++} ++ ++#endif /* CONFIG_FS_EXT3_NFS4ACL */ ++ ++#endif /* __FS_EXT3_NFS4ACL_H */ +--- a/fs/ext3/super.c ++++ b/fs/ext3/super.c +@@ -36,12 +36,14 @@ + #include + #include + #include ++#include + #include + + #include + + #include "xattr.h" + #include "acl.h" ++#include "nfs4acl.h" + #include "namei.h" + + static int ext3_load_journal(struct super_block *, struct ext3_super_block *, +@@ -454,6 +456,9 @@ static struct inode *ext3_alloc_inode(st + ei->i_acl = EXT3_ACL_NOT_CACHED; + ei->i_default_acl = EXT3_ACL_NOT_CACHED; + #endif ++#ifdef CONFIG_EXT3_FS_NFS4ACL ++ ei->i_nfs4acl = EXT3_NFS4ACL_NOT_CACHED; ++#endif + ei->i_block_alloc_info = NULL; + ei->vfs_inode.i_version = 1; + return &ei->vfs_inode; +@@ -516,6 +521,13 @@ static void ext3_clear_inode(struct inod + EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED; + } + #endif ++#ifdef CONFIG_EXT3_FS_NFS4ACL ++ if (EXT3_I(inode)->i_nfs4acl && ++ EXT3_I(inode)->i_nfs4acl != EXT3_NFS4ACL_NOT_CACHED) { ++ nfs4acl_put(EXT3_I(inode)->i_nfs4acl); ++ EXT3_I(inode)->i_nfs4acl = EXT3_NFS4ACL_NOT_CACHED; ++ } ++#endif + ext3_discard_reservation(inode); + EXT3_I(inode)->i_block_alloc_info = NULL; + if (unlikely(rsv)) +@@ -750,7 +762,7 @@ enum { + Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, + Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, + Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, +- Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, ++ Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_acl_flavor, Opt_noacl, + Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, + Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, + Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, +@@ -782,6 +794,7 @@ static match_table_t tokens = { + {Opt_user_xattr, "user_xattr"}, + {Opt_nouser_xattr, "nouser_xattr"}, + {Opt_acl, "acl"}, ++ {Opt_acl_flavor, "acl=%s"}, + {Opt_noacl, "noacl"}, + {Opt_reservation, "reservation"}, + {Opt_noreservation, "noreservation"}, +@@ -925,19 +938,33 @@ static int parse_options (char *options, + printk("EXT3 (no)user_xattr options not supported\n"); + break; + #endif +-#ifdef CONFIG_EXT3_FS_POSIX_ACL + case Opt_acl: +- set_opt(sbi->s_mount_opt, POSIX_ACL); ++ args[0].to = args[0].from; ++ /* fall through */ ++ case Opt_acl_flavor: ++#ifdef CONFIG_EXT3_FS_POSIX_ACL ++ if (match_string(&args[0], "") || ++ match_string(&args[0], "posix")) { ++ set_opt(sbi->s_mount_opt, POSIX_ACL); ++ clear_opt(sbi->s_mount_opt, NFS4ACL); ++ } else ++#endif ++#ifdef CONFIG_EXT3_FS_NFS4ACL ++ if (match_string(&args[0], "nfs4")) { ++ clear_opt(sbi->s_mount_opt, POSIX_ACL); ++ set_opt(sbi->s_mount_opt, NFS4ACL); ++ } else ++#endif ++ { ++ printk(KERN_ERR "EXT3-fs: unsupported acl " ++ "flavor\n"); ++ return 0; ++ } + break; + case Opt_noacl: + clear_opt(sbi->s_mount_opt, POSIX_ACL); ++ clear_opt(sbi->s_mount_opt, NFS4ACL); + break; +-#else +- case Opt_acl: +- case Opt_noacl: +- printk("EXT3 (no)acl options not supported\n"); +- break; +-#endif + case Opt_reservation: + set_opt(sbi->s_mount_opt, RESERVATION); + break; +@@ -1607,8 +1634,11 @@ static int ext3_fill_super (struct super + NULL, 0)) + goto failed_mount; + +- sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | +- ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); ++ sb->s_flags = (sb->s_flags & ~MS_POSIXACL); ++ if (sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ++ sb->s_flags |= MS_POSIXACL; ++ if (sbi->s_mount_opt & EXT3_MOUNT_NFS4ACL) ++ sb->s_flags |= MS_POSIXACL | MS_WITHAPPEND; + + if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV && + (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) || +@@ -2451,8 +2481,12 @@ static int ext3_remount (struct super_bl + if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) + ext3_abort(sb, __func__, "Abort forced by user"); + +- sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | +- ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); ++ sb->s_flags = (sb->s_flags & ~MS_POSIXACL); ++ if (sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ++ sb->s_flags |= MS_POSIXACL; ++ if (sbi->s_mount_opt & EXT3_MOUNT_NFS4ACL) ++ sb->s_flags |= MS_POSIXACL; ++ + + es = sbi->s_es; + +--- a/fs/ext3/xattr.c ++++ b/fs/ext3/xattr.c +@@ -114,6 +114,9 @@ static struct xattr_handler *ext3_xattr_ + #ifdef CONFIG_EXT3_FS_SECURITY + [EXT3_XATTR_INDEX_SECURITY] = &ext3_xattr_security_handler, + #endif ++#ifdef CONFIG_EXT3_FS_NFS4ACL ++ [EXT3_XATTR_INDEX_NFS4ACL] = &ext3_nfs4acl_xattr_handler, ++#endif + }; + + struct xattr_handler *ext3_xattr_handlers[] = { +@@ -126,6 +129,12 @@ struct xattr_handler *ext3_xattr_handler + #ifdef CONFIG_EXT3_FS_SECURITY + &ext3_xattr_security_handler, + #endif ++#ifdef CONFIG_EXT3_FS_NFS4ACL ++ &ext3_nfs4acl_xattr_handler, ++#ifdef NFS4ACL_DEBUG ++ &ext3_masked_nfs4acl_xattr_handler, ++#endif ++#endif + NULL + }; + +--- a/fs/ext3/xattr.h ++++ b/fs/ext3/xattr.h +@@ -21,6 +21,7 @@ + #define EXT3_XATTR_INDEX_TRUSTED 4 + #define EXT3_XATTR_INDEX_LUSTRE 5 + #define EXT3_XATTR_INDEX_SECURITY 6 ++#define EXT3_XATTR_INDEX_NFS4ACL 7 + + struct ext3_xattr_header { + __le32 h_magic; /* magic number for identification */ +@@ -63,6 +64,10 @@ extern struct xattr_handler ext3_xattr_t + extern struct xattr_handler ext3_xattr_acl_access_handler; + extern struct xattr_handler ext3_xattr_acl_default_handler; + extern struct xattr_handler ext3_xattr_security_handler; ++extern struct xattr_handler ext3_nfs4acl_xattr_handler; ++#ifdef NFS4ACL_DEBUG ++extern struct xattr_handler ext3_masked_nfs4acl_xattr_handler; ++#endif + + extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); + +--- a/include/linux/ext3_fs.h ++++ b/include/linux/ext3_fs.h +@@ -380,6 +380,7 @@ struct ext3_inode { + #define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */ + #define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ + #define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ ++#define EXT3_MOUNT_NFS4ACL 0x400000 /* NFS version 4 ACLs */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef _LINUX_EXT2_FS_H +--- a/include/linux/ext3_fs_i.h ++++ b/include/linux/ext3_fs_i.h +@@ -107,6 +107,9 @@ struct ext3_inode_info { + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; + #endif ++#ifdef CONFIG_EXT3_FS_NFS4ACL ++ struct nfs4acl *i_nfs4acl; ++#endif + + struct list_head i_orphan; /* unlinked but open inodes */ + diff --git a/src/patches/suse-2.6.27.25/patches.suse/nfsacl-client-cache-CHECK.diff b/src/patches/suse-2.6.27.25/patches.suse/nfsacl-client-cache-CHECK.diff new file mode 100644 index 0000000000..add3eadd52 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/nfsacl-client-cache-CHECK.diff @@ -0,0 +1,76 @@ +From: Andreas Gruenbacher +Subject: nfsacl: improve cache consistency + +(This one is currently disabled.) + +Index: linux-2.6.11-rc2/fs/nfs/inode.c +=================================================================== +--- linux-2.6.11-rc2.orig/fs/nfs/inode.c ++++ linux-2.6.11-rc2/fs/nfs/inode.c +@@ -65,13 +65,8 @@ static int nfs_statfs(struct super_bloc + static int nfs_show_options(struct seq_file *, struct vfsmount *); + + #ifdef CONFIG_NFS_ACL +-static void nfs_forget_cached_acls(struct inode *); + static void __nfs_forget_cached_acls(struct nfs_inode *nfsi); + #else +-static inline void nfs_forget_cached_acls(struct inode *inode) +-{ +-} +- + static inline void __nfs_forget_cached_acls(struct nfs_inode *nfsi) + { + } +@@ -1188,7 +1183,7 @@ static void __nfs_forget_cached_acls(str + #endif /* CONFIG_NFS_ACL */ + + #ifdef CONFIG_NFS_ACL +-static void nfs_forget_cached_acls(struct inode *inode) ++void nfs_forget_cached_acls(struct inode *inode) + { + dprintk("NFS: nfs_forget_cached_acls(%s/%ld)\n", inode->i_sb->s_id, + inode->i_ino); +@@ -1293,6 +1288,8 @@ int nfs_refresh_inode(struct inode *inod + if ((fattr->valid & NFS_ATTR_WCC) != 0) { + if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); ++ else ++ nfs_forget_cached_acls(inode); + if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + } +Index: linux-2.6.11-rc2/fs/nfs/nfs3proc.c +=================================================================== +--- linux-2.6.11-rc2.orig/fs/nfs/nfs3proc.c ++++ linux-2.6.11-rc2/fs/nfs/nfs3proc.c +@@ -876,7 +876,11 @@ nfs3_proc_setacls(struct inode *inode, s + acl = NULL; + } + } +- nfs_cache_acls(inode, acl, dfacl); ++ if ((fattr.valid & NFS_ATTR_WCC) && ++ timespec_equal(&inode->i_ctime, &fattr.pre_ctime)) ++ nfs_cache_acls(inode, acl, dfacl); ++ else ++ nfs_forget_cached_acls(inode); + status = nfs_refresh_inode(inode, &fattr); + } + +Index: linux-2.6.11-rc2/include/linux/nfs_fs.h +=================================================================== +--- linux-2.6.11-rc2.orig/include/linux/nfs_fs.h ++++ linux-2.6.11-rc2/include/linux/nfs_fs.h +@@ -293,6 +293,13 @@ extern struct inode *nfs_fhget(struct su + struct nfs_fattr *); + extern struct posix_acl *nfs_get_cached_acl(struct inode *, int); + extern void nfs_cache_acls(struct inode *, struct posix_acl *, struct posix_acl *); ++#ifdef CONFIG_NFS_ACL ++void nfs_forget_cached_acls(struct inode *); ++#else ++static inline void nfs_forget_cached_acls(struct inode *inode) ++{ ++} ++#endif + extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); + extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); + extern int nfs_permission(struct inode *, int, struct nameidata *); diff --git a/src/patches/suse-2.6.27.25/patches.suse/novfs-map-drives-correctly.diff b/src/patches/suse-2.6.27.25/patches.suse/novfs-map-drives-correctly.diff new file mode 100644 index 0000000000..44738a3b64 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/novfs-map-drives-correctly.diff @@ -0,0 +1,78 @@ +From: Goldwyn Rodrigues +Subject: Fix oops in set_map_drive +References: bnc#446824, bnc#444469 + +The oops was caused because of an unconditional free because of the +merge changes. +The error was caused because novfs_set_map_drive was not being called +with the right args, which caused it to request for incorrect memory +size. +Cleaned up some debug messages as well, and corrected debug messages. + + +--- + fs/novfs/daemon.c | 9 +++++---- + fs/novfs/inode.c | 11 ----------- + 2 files changed, 5 insertions(+), 15 deletions(-) + +Index: linux-2.6.27/fs/novfs/daemon.c +=================================================================== +--- linux-2.6.27.orig/fs/novfs/daemon.c 2008-12-02 14:57:29.000000000 +0530 ++++ linux-2.6.27/fs/novfs/daemon.c 2008-12-02 15:03:15.000000000 +0530 +@@ -1936,7 +1936,7 @@ static int set_map_drive(struct novfs_xp + full_name_hash(drivemap->name, + symInfo.linkOffsetLength - 1); + drivemap->namelen = symInfo.linkOffsetLength - 1; +- DbgPrint("NwdSetMapDrive: hash=0x%x path=%s\n", ++ DbgPrint("set_map_drive: hash=0x%lx path=%s\n", + drivemap->hash, drivemap->name); + + dm = (struct drive_map *) & DriveMapList.next; +@@ -1945,8 +1945,8 @@ static int set_map_drive(struct novfs_xp + + list_for_each(list, &DriveMapList) { + dm = list_entry(list, struct drive_map, list); +- DbgPrint("NwdSetMapDrive: dm=0x%p\n" +- " hash: 0x%x\n" ++ DbgPrint("set_map_drive: dm=0x%p\n" ++ " hash: 0x%lx\n" + " namelen: %d\n" + " name: %s\n", + dm, dm->hash, dm->namelen, dm->name); +@@ -1971,7 +1971,8 @@ static int set_map_drive(struct novfs_xp + &dm->list); + } + } +- kfree(drivemap); ++ else ++ kfree(drivemap); + up(&DriveMapLock); + return (retVal); + } +Index: linux-2.6.27/fs/novfs/inode.c +=================================================================== +--- linux-2.6.27.orig/fs/novfs/inode.c 2008-12-02 15:01:46.000000000 +0530 ++++ linux-2.6.27/fs/novfs/inode.c 2008-12-02 15:01:52.000000000 +0530 +@@ -4055,22 +4055,11 @@ int __init init_novfs(void) + + void __exit exit_novfs(void) + { +- printk(KERN_INFO "exit_novfs\n"); +- + novfs_scope_exit(); +- printk(KERN_INFO "exit_novfs after Scope_Uninit\n"); +- + novfs_daemon_queue_exit(); +- printk(KERN_INFO "exit_novfs after Uninit_Daemon_Queue\n"); +- + novfs_profile_exit(); +- printk(KERN_INFO "exit_novfs after profile_exit\n"); +- + novfs_proc_exit(); +- printk(KERN_INFO "exit_novfs Uninit_Procfs_Interface\n"); +- + unregister_filesystem(&novfs_fs_type); +- printk(KERN_INFO "exit_novfs: Exit\n"); + + if (novfs_current_mnt) { + kfree(novfs_current_mnt); diff --git a/src/patches/suse-2.6.27.25/patches.suse/novfs-merge-changes.diff b/src/patches/suse-2.6.27.25/patches.suse/novfs-merge-changes.diff new file mode 100644 index 0000000000..fddf2cfc67 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/novfs-merge-changes.diff @@ -0,0 +1,333 @@ +From: Goldwyn Rodrigues flags = flags; + cmd->pathLen = pathlen; +- memcpy(cmd->data, Path, cmd->pathLen + 1); //+ '\0' ++ memcpy(cmd->data, Path, cmd->pathLen); + + cmd->nameLen = namelen; + memcpy(cmd->data + cmd->pathLen + 1, Name, cmd->nameLen + 1); +@@ -601,7 +601,7 @@ static int begin_directory_enumerate(uns + return (retCode); + } + +-static int end_directory_enumerate(void *EnumHandle, struct novfs_schandle SessionId) ++int novfs_end_directory_enumerate(void *EnumHandle, struct novfs_schandle SessionId) + { + struct novfs_end_enumerate_directory_request cmd; + struct novfs_end_enumerate_directory_reply *reply = NULL; +@@ -793,11 +793,9 @@ int novfs_get_dir_listex(unsigned char * + directory_enumerate_ex(EnumHandle, SessionId, Count, Info, + INTERRUPTIBLE); + if (retCode) { +- end_directory_enumerate(*EnumHandle, SessionId); +- if (-1 == retCode) { +- retCode = 0; +- *EnumHandle = Uint32toHandle(-1); +- } ++ novfs_end_directory_enumerate(*EnumHandle, SessionId); ++ retCode = 0; ++ *EnumHandle = Uint32toHandle(-1); + } + } + return (retCode); +@@ -915,32 +913,33 @@ int novfs_create(unsigned char * Path, i + + cmdlen = offsetof(struct novfs_create_file_request, path) + pathlen; + cmd = kmalloc(cmdlen, GFP_KERNEL); +- if (cmd) { +- cmd->Command.CommandType = VFS_COMMAND_CREATE_FILE; +- if (DirectoryFlag) { +- cmd->Command.CommandType = VFS_COMMAND_CREATE_DIRECOTRY; +- } +- cmd->Command.SequenceNumber = 0; +- cmd->Command.SessionId = SessionId; ++ if (!cmd) ++ return -ENOMEM; ++ cmd->Command.CommandType = VFS_COMMAND_CREATE_FILE; ++ if (DirectoryFlag) { ++ cmd->Command.CommandType = VFS_COMMAND_CREATE_DIRECOTRY; ++ } ++ cmd->Command.SequenceNumber = 0; ++ cmd->Command.SessionId = SessionId; + +- cmd->pathlength = pathlen; +- memcpy(cmd->path, Path, pathlen); ++ cmd->pathlength = pathlen; ++ memcpy(cmd->path, Path, pathlen); + +- retCode = +- Queue_Daemon_Command(cmd, cmdlen, NULL, 0, (void *)&reply, +- &replylen, INTERRUPTIBLE); ++ retCode = ++ Queue_Daemon_Command(cmd, cmdlen, NULL, 0, (void *)&reply, ++ &replylen, INTERRUPTIBLE); ++ ++ if (reply) { ++ retCode = 0; ++ if (reply->Reply.ErrorCode) { ++ retCode = -EIO; ++ if (reply->Reply.ErrorCode == NWE_ACCESS_DENIED) ++ retCode = -EACCES; + +- if (reply) { +- retCode = 0; +- if (reply->Reply.ErrorCode) { +- retCode = -EIO; +- } +- kfree(reply); + } +- kfree(cmd); +- } else { +- retCode = -ENOMEM; ++ kfree(reply); + } ++ kfree(cmd); + return (retCode); + } + +Index: linux-2.6.27/fs/novfs/inode.c +=================================================================== +--- linux-2.6.27.orig/fs/novfs/inode.c 2008-12-02 11:57:24.000000000 +0530 ++++ linux-2.6.27/fs/novfs/inode.c 2008-12-02 11:58:35.000000000 +0530 +@@ -137,11 +137,11 @@ int novfs_i_revalidate(struct dentry *de + * Extended attributes operations + */ + +-int novfs_i_getxattr(struct dentry *dentry, const char *name, void *buffer, ++ssize_t novfs_i_getxattr(struct dentry *dentry, const char *name, void *buffer, + size_t size); + int novfs_i_setxattr(struct dentry *dentry, const char *name, const void *value, + size_t value_size, int flags); +-int novfs_i_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); ++ssize_t novfs_i_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); + + void update_inode(struct inode *Inode, struct novfs_entry_info *Info); + +@@ -262,21 +262,17 @@ static struct inode_operations novfs_ino + .rename = novfs_i_rename, + .setattr = novfs_i_setattr, + .getattr = novfs_i_getattr, +-/* + .getxattr = novfs_i_getxattr, + .setxattr = novfs_i_setxattr, + .listxattr = novfs_i_listxattr, +-*/ + }; + + static struct inode_operations novfs_file_inode_operations = { + .setattr = novfs_i_setattr, + .getattr = novfs_i_getattr, +-/* + .getxattr = novfs_i_getxattr, + .setxattr = novfs_i_setxattr, + .listxattr = novfs_i_listxattr, +-*/ + }; + + static struct super_operations novfs_ops = { +@@ -935,14 +931,23 @@ int novfs_dir_open(struct inode *dir, st + + int novfs_dir_release(struct inode *dir, struct file *file) + { +- struct file_private *file_private; +- file_private = (struct file_private *) file->private_data; ++ struct file_private *file_private = file->private_data; ++ struct inode *inode = file->f_dentry->d_inode; ++ struct novfs_schandle sessionId; + + DbgPrint("novfs_dir_release: Inode 0x%p %d Name %.*s\n", dir, + dir->i_ino, file->f_dentry->d_name.len, + file->f_dentry->d_name.name); + + if (file_private) { ++ if (file_private->enumHandle && (file_private->enumHandle != ((void *)-1))) { ++ sessionId = novfs_scope_get_sessionId(((struct inode_data *)inode->i_private)->Scope); ++ if (SC_PRESENT(sessionId) == 0) { ++ ((struct inode_data *)inode->i_private)->Scope = novfs_get_scope(file->f_dentry); ++ sessionId = novfs_scope_get_sessionId(((struct inode_data *)inode->i_private)->Scope); ++ } ++ novfs_end_directory_enumerate(file_private->enumHandle, sessionId); ++ } + kfree(file_private); + file->private_data = NULL; + } +@@ -966,6 +971,16 @@ loff_t novfs_dir_lseek(struct file * fil + + file_private = (struct file_private *) file->private_data; + file_private->listedall = 0; ++ if (file_private->enumHandle && (file_private->enumHandle != ((void *)-1))) { ++ struct novfs_schandle sessionId; ++ struct inode *inode = file->f_dentry->d_inode; ++ sessionId = novfs_scope_get_sessionId(((struct inode_data *)inode->i_private)->Scope); ++ if (SC_PRESENT(sessionId) == 0) { ++ ((struct inode_data *)inode->i_private)->Scope = novfs_get_scope(file->f_dentry); ++ sessionId = novfs_scope_get_sessionId(((struct inode_data *)inode->i_private)->Scope); ++ } ++ novfs_end_directory_enumerate(file_private->enumHandle, sessionId); ++ } + file_private->enumHandle = NULL; + + return 0; +@@ -2864,9 +2879,15 @@ int novfs_i_unlink(struct inode *dir, st + } else { + retCode = + novfs_delete(path, +- S_ISDIR(inode-> +- i_mode), +- session); ++ S_ISDIR(inode->i_mode), session); ++ if (retCode) { ++ struct iattr ia; ++ memset(&ia, 0, sizeof(ia)); ++ ia.ia_valid = ATTR_MODE; ++ ia.ia_mode = S_IRWXU; ++ novfs_set_attr(path, &ia, session); ++ retCode = novfs_delete(path, S_ISDIR(inode->i_mode), session); ++ } + } + if (!retCode || IS_DEADDIR(inode)) { + novfs_remove_inode_entry(dir, +@@ -3119,13 +3140,16 @@ int novfs_i_rename(struct inode *odir, s + } + + retCode = +- novfs_delete +- (newpath, +- S_ISDIR +- (nd-> +- d_inode-> +- i_mode), +- session); ++ novfs_delete(newpath, S_ISDIR(nd->d_inode->i_mode), session); ++ if (retCode) { ++ struct iattr ia; ++ memset(&ia, 0, sizeof(ia)); ++ ia.ia_valid = ATTR_MODE; ++ ia.ia_mode = S_IRWXU; ++ novfs_set_attr(newpath, &ia, session); ++ retCode = novfs_delete(newpath, S_ISDIR(nd->d_inode->i_mode), session); ++ } ++ + } + + session = novfs_scope_get_sessionId(((struct inode_data *) ndir->i_private)->Scope); +@@ -3378,7 +3402,7 @@ int novfs_i_getattr(struct vfsmount *mnt + return (retCode); + } + +-int novfs_i_getxattr(struct dentry *dentry, const char *name, void *buffer, ++ssize_t novfs_i_getxattr(struct dentry *dentry, const char *name, void *buffer, + size_t buffer_size) + { + struct inode *inode = dentry->d_inode; +@@ -3528,7 +3552,7 @@ int novfs_i_setxattr(struct dentry *dent + return (retError); + } + +-int novfs_i_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) ++ssize_t novfs_i_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) + { + struct inode *inode = dentry->d_inode; + struct novfs_schandle sessionId; +@@ -3720,6 +3744,9 @@ int novfs_statfs(struct dentry *de, stru + DbgPrint("fd=%llu\n", fd); + DbgPrint("te=%llu\n", te); + DbgPrint("fe=%llu\n", fd); ++ /* fix for Nautilus */ ++ if (sb->s_blocksize == 0) ++ sb->s_blocksize = 4096; + + buf->f_type = sb->s_magic; + buf->f_bsize = sb->s_blocksize; +@@ -3762,7 +3789,6 @@ struct inode *novfs_get_inode(struct sup + inode->i_mode = mode; + inode->i_uid = Uid; + inode->i_gid = 0; +- inode->i_sb->s_blocksize = sb->s_blocksize; + inode->i_blkbits = sb->s_blocksize_bits; + inode->i_blocks = 0; + inode->i_rdev = 0; +@@ -3826,8 +3852,6 @@ struct inode *novfs_get_inode(struct sup + case S_IFDIR: + inode->i_op = &novfs_inode_operations; + inode->i_fop = &novfs_dir_operations; +- +- inode->i_sb->s_blocksize = 0; + inode->i_blkbits = 0; + break; + +@@ -3957,6 +3981,7 @@ static int novfs_get_sb(struct file_syst + + static void novfs_kill_sb(struct super_block *super) + { ++ shrink_dcache_sb(super); + kill_litter_super(super); + } + +Index: linux-2.6.27/fs/novfs/vfs.h +=================================================================== +--- linux-2.6.27.orig/fs/novfs/vfs.h 2008-12-02 11:57:24.000000000 +0530 ++++ linux-2.6.27/fs/novfs/vfs.h 2008-12-02 11:58:18.000000000 +0530 +@@ -344,7 +344,8 @@ extern int novfs_close_stream(void * Con + struct novfs_schandle SessionId); + + extern int novfs_add_to_root(char *); +- ++extern int novfs_end_directory_enumerate(void *EnumHandle, ++ struct novfs_schandle SessionId); + + /* + * scope.c functions +Index: linux-2.6.27/fs/novfs/nwcapi.c +=================================================================== +--- linux-2.6.27.orig/fs/novfs/nwcapi.c 2008-12-02 11:57:24.000000000 +0530 ++++ linux-2.6.27/fs/novfs/nwcapi.c 2008-12-02 11:58:18.000000000 +0530 +@@ -1845,14 +1845,14 @@ int novfs_set_map_drive(struct novfs_xpl + + struct novfs_xplat_call_request *cmd; + struct novfs_xplat_call_reply *reply; +- unsigned long status = 0, datalen, cmdlen, replylen, cpylen; ++ unsigned long status = 0, datalen, cmdlen, replylen; + struct nwc_map_drive_ex symInfo; + + DbgPrint("Call to NwcSetMapDrive\n"); +- cpylen = copy_from_user(&symInfo, pdata->reqData, sizeof(symInfo)); + cmdlen = sizeof(*cmd); +- datalen = +- sizeof(symInfo) + symInfo.dirPathOffsetLength + ++ if (copy_from_user(&symInfo, pdata->reqData, sizeof(symInfo))) ++ return -EFAULT; ++ datalen = sizeof(symInfo) + symInfo.dirPathOffsetLength + + symInfo.linkOffsetLength; + + DbgPrint(" cmdlen = %d\n", cmdlen); +@@ -1876,7 +1876,10 @@ int novfs_set_map_drive(struct novfs_xpl + cmd->Command.SessionId = Session; + cmd->NwcCommand = NWC_MAP_DRIVE; + +- cpylen = copy_from_user(cmd->data, pdata->reqData, datalen); ++ if (copy_from_user(cmd->data, pdata->reqData, datalen)) { ++ kfree(cmd); ++ return -EFAULT; ++ } + status = + Queue_Daemon_Command((void *)cmd, cmdlen, NULL, 0, + (void **)&reply, &replylen, diff --git a/src/patches/suse-2.6.27.25/patches.suse/panic-on-io-nmi.diff b/src/patches/suse-2.6.27.25/patches.suse/panic-on-io-nmi.diff new file mode 100644 index 0000000000..e6043a2642 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/panic-on-io-nmi.diff @@ -0,0 +1,116 @@ +From: Kurt Garloff +Subject: [PATCH] X86: sysctl to allow panic on IOCK NMI error +References: bnc427979 + +This patch introduces a sysctl /proc/sys/kernel/panic_on_io_nmi., +which defaults to 0 (off). +When enabled, the kernel panics when the kernel receives an NMI +caused by an IO error. + +The IO error triggered NMI indicates a serious system condition, +which could result in IO data corruption. Rather than contiuing, +panicing and dumping might be a better choice, so one can figure +out what's causing the IO error. +This could be especially important to companies running IO intensive +applications where corruption must be avoided, e.g. a banks databases. + + +Signed-off-by: Roberto Angelino + + +--- + arch/x86/kernel/traps_32.c | 4 ++++ + arch/x86/kernel/traps_64.c | 4 ++++ + include/linux/kernel.h | 1 + + include/linux/sysctl.h | 1 + + kernel/sysctl.c | 8 ++++++++ + kernel/sysctl_check.c | 1 + + 6 files changed, 19 insertions(+) + +--- a/arch/x86/kernel/traps_32.c ++++ b/arch/x86/kernel/traps_32.c +@@ -83,6 +83,7 @@ gate_desc idt_table[256] + __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; + + int panic_on_unrecovered_nmi; ++int panic_on_io_nmi; + int kstack_depth_to_print = 24; + static unsigned int code_bytes = 64; + #ifdef CONFIG_STACK_UNWIND +@@ -779,6 +780,9 @@ io_check_error(unsigned char reason, str + printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); + show_registers(regs); + ++ if (panic_on_io_nmi) ++ panic("NMI IOCK error: Not continuing"); ++ + /* Re-enable the IOCK line, wait for a few seconds */ + reason = (reason & 0xf) | 8; + outb(reason, 0x61); +--- a/arch/x86/kernel/traps_64.c ++++ b/arch/x86/kernel/traps_64.c +@@ -56,6 +56,7 @@ + #include + + int panic_on_unrecovered_nmi; ++int panic_on_io_nmi; + int kstack_depth_to_print = 12; + static unsigned int code_bytes = 64; + #ifdef CONFIG_STACK_UNWIND +@@ -840,6 +841,9 @@ io_check_error(unsigned char reason, str + printk("NMI: IOCK error (debug interrupt?)\n"); + show_registers(regs); + ++ if (panic_on_io_nmi) ++ panic("NMI IOCK error: Not continuing"); ++ + /* Re-enable the IOCK line, wait for a few seconds */ + reason = (reason & 0xf) | 8; + outb(reason, 0x61); +--- a/include/linux/kernel.h ++++ b/include/linux/kernel.h +@@ -236,6 +236,7 @@ extern int oops_in_progress; /* If set, + extern int panic_timeout; + extern int panic_on_oops; + extern int panic_on_unrecovered_nmi; ++extern int panic_on_io_nmi; + extern int tainted; + extern int unsupported; + extern const char *print_tainted(void); +--- a/include/linux/sysctl.h ++++ b/include/linux/sysctl.h +@@ -165,6 +165,7 @@ enum + KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */ + KERN_KDB=77, /* int: kdb on/off */ + KERN_DUMP_AFTER_NOTIFIER=78, /* int: kdump after panic_notifier (SUSE only) */ ++ KERN_PANIC_ON_IO_NMI=79, /* int: whether we will panic on an io NMI */ + }; + + +--- a/kernel/sysctl.c ++++ b/kernel/sysctl.c +@@ -691,6 +691,14 @@ static struct ctl_table kern_table[] = { + .proc_handler = &proc_dointvec, + }, + { ++ .ctl_name = KERN_PANIC_ON_IO_NMI, ++ .procname = "panic_on_io_nmi", ++ .data = &panic_on_io_nmi, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec, ++ }, ++ { + .ctl_name = KERN_BOOTLOADER_TYPE, + .procname = "bootloader_type", + .data = &bootloader_type, +--- a/kernel/sysctl_check.c ++++ b/kernel/sysctl_check.c +@@ -104,6 +104,7 @@ static const struct trans_ctl_table tran + { KERN_MAX_LOCK_DEPTH, "max_lock_depth" }, + { KERN_NMI_WATCHDOG, "nmi_watchdog" }, + { KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" }, ++ { KERN_PANIC_ON_IO_NMI, "panic_on_io_nmi" }, + { KERN_SETUID_DUMPABLE, "suid_dumpable" }, + { KERN_KDB, "kdb" }, + { KERN_DUMP_AFTER_NOTIFIER, "dump_after_notifier" }, diff --git a/src/patches/suse-2.6.27.25/patches.suse/parser-match_string.diff b/src/patches/suse-2.6.27.25/patches.suse/parser-match_string.diff new file mode 100644 index 0000000000..912f25ddf7 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/parser-match_string.diff @@ -0,0 +1,55 @@ +From: Andreas Gruenbacher +Subject: Add match_string() for mount option parsing +References: FATE301275 +Patch-mainline: no + +The match_string() function allows to parse string constants in +mount options. + +Signed-off-by: Andreas Gruenbacher + +--- + include/linux/parser.h | 1 + + lib/parser.c | 14 ++++++++++++++ + 2 files changed, 15 insertions(+) + +--- a/include/linux/parser.h ++++ b/include/linux/parser.h +@@ -26,6 +26,7 @@ typedef struct { + } substring_t; + + int match_token(char *, match_table_t table, substring_t args[]); ++int match_string(substring_t *s, const char *str); + int match_int(substring_t *, int *result); + int match_octal(substring_t *, int *result); + int match_hex(substring_t *, int *result); +--- a/lib/parser.c ++++ b/lib/parser.c +@@ -111,6 +111,19 @@ int match_token(char *s, match_table_t t + } + + /** ++ * match_string: check for a particular parameter ++ * @s: substring to be scanned ++ * @str: string to scan for ++ * ++ * Description: Return if a &substring_t is equal to string @str. ++ */ ++int match_string(substring_t *s, const char *str) ++{ ++ return strlen(str) == s->to - s->from && ++ !memcmp(str, s->from, s->to - s->from); ++} ++ ++/** + * match_number: scan a number in the given base from a substring_t + * @s: substring to be scanned + * @result: resulting integer on success +@@ -221,6 +234,7 @@ char *match_strdup(const substring_t *s) + } + + EXPORT_SYMBOL(match_token); ++EXPORT_SYMBOL(match_string); + EXPORT_SYMBOL(match_int); + EXPORT_SYMBOL(match_octal); + EXPORT_SYMBOL(match_hex); diff --git a/src/patches/suse-2.6.27.25/patches.suse/raw_device_max_minors_param.diff b/src/patches/suse-2.6.27.25/patches.suse/raw_device_max_minors_param.diff new file mode 100644 index 0000000000..2971d63f2c --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/raw_device_max_minors_param.diff @@ -0,0 +1,112 @@ +From: Jan Kara +Subject: Allow setting of number of raw devices as a module parameter +References: FATE 302178 +Patch-mainline: never + +Allow setting of maximal number of raw devices as a module parameter. This requires +changing of static array into a vmalloced one (the array is going to be too large +for kmalloc). + +Signed-off-by: Jan Kara + +--- + drivers/char/Kconfig | 2 +- + drivers/char/raw.c | 33 +++++++++++++++++++++++++++------ + 2 files changed, 28 insertions(+), 7 deletions(-) + +--- a/drivers/char/Kconfig ++++ b/drivers/char/Kconfig +@@ -1026,7 +1026,7 @@ config RAW_DRIVER + with the O_DIRECT flag. + + config MAX_RAW_DEVS +- int "Maximum number of RAW devices to support (1-8192)" ++ int "Maximum number of RAW devices to support (1-65536)" + depends on RAW_DRIVER + default "256" + help +--- a/drivers/char/raw.c ++++ b/drivers/char/raw.c +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + + #include + +@@ -29,10 +30,15 @@ struct raw_device_data { + }; + + static struct class *raw_class; +-static struct raw_device_data raw_devices[MAX_RAW_MINORS]; ++static struct raw_device_data *raw_devices; + static DEFINE_MUTEX(raw_mutex); + static const struct file_operations raw_ctl_fops; /* forward declaration */ + ++static int max_raw_minors = MAX_RAW_MINORS; ++ ++module_param(max_raw_minors, int, 0); ++MODULE_PARM_DESC(max_raw_minors, "Maximum number of raw devices (1-65536)"); ++ + /* + * Open/close code for raw IO. + * +@@ -158,7 +164,7 @@ static int raw_ctl_ioctl(struct inode *i + goto out; + } + +- if (rq.raw_minor <= 0 || rq.raw_minor >= MAX_RAW_MINORS) { ++ if (rq.raw_minor <= 0 || rq.raw_minor >= max_raw_minors) { + err = -EINVAL; + goto out; + } +@@ -266,12 +272,26 @@ static int __init raw_init(void) + dev_t dev = MKDEV(RAW_MAJOR, 0); + int ret; + +- ret = register_chrdev_region(dev, MAX_RAW_MINORS, "raw"); ++ if (max_raw_minors < 1 || max_raw_minors > 65536) { ++ printk(KERN_WARNING "raw: invalid max_raw_minors (must be" ++ " between 1 and 65536), using %d\n", MAX_RAW_MINORS); ++ max_raw_minors = MAX_RAW_MINORS; ++ } ++ ++ raw_devices = vmalloc(sizeof(struct raw_device_data) * max_raw_minors); ++ if (!raw_devices) { ++ printk(KERN_ERR "Not enough memory for raw device structures\n"); ++ ret = -ENOMEM; ++ goto error; ++ } ++ memset(raw_devices, 0, sizeof(struct raw_device_data) * max_raw_minors); ++ ++ ret = register_chrdev_region(dev, max_raw_minors, "raw"); + if (ret) + goto error; + + cdev_init(&raw_cdev, &raw_fops); +- ret = cdev_add(&raw_cdev, dev, MAX_RAW_MINORS); ++ ret = cdev_add(&raw_cdev, dev, max_raw_minors); + if (ret) { + kobject_put(&raw_cdev.kobj); + goto error_region; +@@ -290,8 +310,9 @@ static int __init raw_init(void) + return 0; + + error_region: +- unregister_chrdev_region(dev, MAX_RAW_MINORS); ++ unregister_chrdev_region(dev, max_raw_minors); + error: ++ vfree(raw_devices); + return ret; + } + +@@ -300,7 +321,7 @@ static void __exit raw_exit(void) + device_destroy(raw_class, MKDEV(RAW_MAJOR, 0)); + class_destroy(raw_class); + cdev_del(&raw_cdev); +- unregister_chrdev_region(MKDEV(RAW_MAJOR, 0), MAX_RAW_MINORS); ++ unregister_chrdev_region(MKDEV(RAW_MAJOR, 0), max_raw_minors); + } + + module_init(raw_init); diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-add-reiserfs_error.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-add-reiserfs_error.diff new file mode 100644 index 0000000000..04f95cfd39 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-add-reiserfs_error.diff @@ -0,0 +1,64 @@ +From: Jeff Mahoney +Subject: reiserfs: introduce reiserfs_error() + + Although reiserfs can currently handle severe errors such as journal failure, + it cannot handle less severe errors like metadata i/o failure. The following + patch adds a reiserfs_error() function akin to the one in ext3. + + Subsequent patches will use this new error handler to handle errors more + gracefully in general. + +Signed-off-by: Jeff Mahoney + +-- + fs/reiserfs/prints.c | 25 +++++++++++++++++++++++++ + include/linux/reiserfs_fs.h | 4 ++++ + 2 files changed, 29 insertions(+) + +--- a/fs/reiserfs/prints.c ++++ b/fs/reiserfs/prints.c +@@ -373,6 +373,31 @@ void __reiserfs_panic(struct super_block + id ? id : "", id ? " " : "", function, error_buf); + } + ++void __reiserfs_error(struct super_block *sb, const char *id, ++ const char *function, const char *fmt, ...) ++{ ++ do_reiserfs_warning(fmt); ++ ++ BUG_ON(sb == NULL); ++ ++ if (reiserfs_error_panic(sb)) ++ __reiserfs_panic(sb, id, function, error_buf); ++ ++ if (id && id[0]) ++ printk(KERN_CRIT "REISERFS error (device %s): %s %s: %s\n", ++ sb->s_id, id, function, error_buf); ++ else ++ printk(KERN_CRIT "REISERFS error (device %s): %s: %s\n", ++ sb->s_id, function, error_buf); ++ ++ if (sb->s_flags & MS_RDONLY) ++ return; ++ ++ reiserfs_info(sb, "Remounting filesystem read-only\n"); ++ sb->s_flags |= MS_RDONLY; ++ reiserfs_abort_journal(sb, -EIO); ++} ++ + void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...) + { + do_reiserfs_warning(fmt); +--- a/include/linux/reiserfs_fs.h ++++ b/include/linux/reiserfs_fs.h +@@ -2006,6 +2006,10 @@ void __reiserfs_panic(struct super_block + __attribute__ ((noreturn)); + #define reiserfs_panic(s, id, fmt, args...) \ + __reiserfs_panic(s, id, __func__, fmt, ##args) ++void __reiserfs_error(struct super_block *s, const char *id, ++ const char *function, const char *fmt, ...); ++#define reiserfs_error(s, id, fmt, args...) \ ++ __reiserfs_error(s, id, __func__, fmt, ##args) + void reiserfs_info(struct super_block *s, const char *fmt, ...); + void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...); + void print_indirect_item(struct buffer_head *bh, int item_num); diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-buffer-info-for-balance.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-buffer-info-for-balance.diff new file mode 100644 index 0000000000..050494788b --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-buffer-info-for-balance.diff @@ -0,0 +1,122 @@ +From: Jeff Mahoney +Subject: reiserfs: use buffer_info for leaf_paste_entries + + This patch makes leaf_paste_entries more consistent with respect to the + other leaf operations. Using buffer_info instead of buffer_head directly + allows us to get a superblock pointer for use in error handling. + +Signed-off-by: Jeff Mahoney + +--- + fs/reiserfs/do_balan.c | 17 +++++++---------- + fs/reiserfs/lbalance.c | 5 +++-- + include/linux/reiserfs_fs.h | 2 +- + 3 files changed, 11 insertions(+), 13 deletions(-) + +--- a/fs/reiserfs/do_balan.c ++++ b/fs/reiserfs/do_balan.c +@@ -449,8 +449,7 @@ static int balance_leaf(struct tree_bala + /* when we have merge directory item, pos_in_item has been changed too */ + + /* paste new directory entry. 1 is entry number */ +- leaf_paste_entries(bi. +- bi_bh, ++ leaf_paste_entries(&bi, + n + + item_pos + - +@@ -699,7 +698,7 @@ static int balance_leaf(struct tree_bala + n + item_pos - + ret_val); + if (is_direntry_le_ih(pasted)) +- leaf_paste_entries(bi.bi_bh, ++ leaf_paste_entries(&bi, + n + + item_pos - + ret_val, +@@ -894,8 +893,7 @@ static int balance_leaf(struct tree_bala + tb->insert_size[0], + body, zeros_num); + /* paste entry */ +- leaf_paste_entries(bi. +- bi_bh, ++ leaf_paste_entries(&bi, + 0, + paste_entry_position, + 1, +@@ -1096,7 +1094,7 @@ static int balance_leaf(struct tree_bala + tb->rnum[0]); + if (is_direntry_le_ih(pasted) + && pos_in_item >= 0) { +- leaf_paste_entries(bi.bi_bh, ++ leaf_paste_entries(&bi, + item_pos - + n + + tb->rnum[0], +@@ -1339,8 +1337,7 @@ static int balance_leaf(struct tree_bala + tb->insert_size[0], + body, zeros_num); + /* paste new directory entry */ +- leaf_paste_entries(bi. +- bi_bh, ++ leaf_paste_entries(&bi, + 0, + pos_in_item + - +@@ -1505,7 +1502,7 @@ static int balance_leaf(struct tree_bala + item_pos - n + + snum[i]); + if (is_direntry_le_ih(pasted)) { +- leaf_paste_entries(bi.bi_bh, ++ leaf_paste_entries(&bi, + item_pos - + n + snum[i], + pos_in_item, +@@ -1606,7 +1603,7 @@ static int balance_leaf(struct tree_bala + zeros_num); + + /* paste entry */ +- leaf_paste_entries(bi.bi_bh, ++ leaf_paste_entries(&bi, + item_pos, + pos_in_item, + 1, +--- a/fs/reiserfs/lbalance.c ++++ b/fs/reiserfs/lbalance.c +@@ -111,7 +111,7 @@ static void leaf_copy_dir_entries(struct + item_num_in_dest = + (last_first == FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - 1) : 0; + +- leaf_paste_entries(dest_bi->bi_bh, item_num_in_dest, ++ leaf_paste_entries(dest_bi, item_num_in_dest, + (last_first == + FIRST_TO_LAST) ? I_ENTRY_COUNT(B_N_PITEM_HEAD(dest, + item_num_in_dest)) +@@ -1191,7 +1191,7 @@ static void leaf_delete_items_entirely(s + } + + /* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */ +-void leaf_paste_entries(struct buffer_head *bh, ++void leaf_paste_entries(struct buffer_info *bi, + int item_num, + int before, + int new_entry_count, +@@ -1203,6 +1203,7 @@ void leaf_paste_entries(struct buffer_he + struct reiserfs_de_head *deh; + char *insert_point; + int i, old_entry_num; ++ struct buffer_head *bh = bi->bi_bh; + + if (new_entry_count == 0) + return; +--- a/include/linux/reiserfs_fs.h ++++ b/include/linux/reiserfs_fs.h +@@ -2026,7 +2026,7 @@ void leaf_paste_in_buffer(struct buffer_ + int zeros_number); + void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, + int pos_in_item, int cut_size); +-void leaf_paste_entries(struct buffer_head *bh, int item_num, int before, ++void leaf_paste_entries(struct buffer_info *bi, int item_num, int before, + int new_entry_count, struct reiserfs_de_head *new_dehs, + const char *records, int paste_size); + /* ibalance.c */ diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-cleanup-path-funcs.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-cleanup-path-funcs.diff new file mode 100644 index 0000000000..d567695257 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-cleanup-path-funcs.diff @@ -0,0 +1,290 @@ +From: Jeff Mahoney +Subject: reiserfs: cleanup path functions + + This patch cleans up some redundancies in the reiserfs tree path code. + + decrement_bcount() is essentially the same function as brelse(), so we use + that instead. + + decrement_counters_in_path() is exactly the same function as pathrelse(), so + we kill that and use pathrelse() instead. + + There's also a bit of cleanup that makes the code a bit more readable. + +Signed-off-by: Jeff Mahoney + +--- + + fs/reiserfs/fix_node.c | 58 ++++++++++++++++++++++++------------------------ + fs/reiserfs/stree.c | 59 ++++++++++--------------------------------------- + 2 files changed, 43 insertions(+), 74 deletions(-) + +--- a/fs/reiserfs/fix_node.c ++++ b/fs/reiserfs/fix_node.c +@@ -753,20 +753,21 @@ static void free_buffers_in_tb(struct tr + { + int n_counter; + +- decrement_counters_in_path(p_s_tb->tb_path); ++ pathrelse(p_s_tb->tb_path); + + for (n_counter = 0; n_counter < MAX_HEIGHT; n_counter++) { +- decrement_bcount(p_s_tb->L[n_counter]); ++ brelse(p_s_tb->L[n_counter]); ++ brelse(p_s_tb->R[n_counter]); ++ brelse(p_s_tb->FL[n_counter]); ++ brelse(p_s_tb->FR[n_counter]); ++ brelse(p_s_tb->CFL[n_counter]); ++ brelse(p_s_tb->CFR[n_counter]); ++ + p_s_tb->L[n_counter] = NULL; +- decrement_bcount(p_s_tb->R[n_counter]); + p_s_tb->R[n_counter] = NULL; +- decrement_bcount(p_s_tb->FL[n_counter]); + p_s_tb->FL[n_counter] = NULL; +- decrement_bcount(p_s_tb->FR[n_counter]); + p_s_tb->FR[n_counter] = NULL; +- decrement_bcount(p_s_tb->CFL[n_counter]); + p_s_tb->CFL[n_counter] = NULL; +- decrement_bcount(p_s_tb->CFR[n_counter]); + p_s_tb->CFR[n_counter] = NULL; + } + } +@@ -1022,7 +1023,7 @@ static int get_far_parent(struct tree_ba + if (buffer_locked(*pp_s_com_father)) { + __wait_on_buffer(*pp_s_com_father); + if (FILESYSTEM_CHANGED_TB(p_s_tb)) { +- decrement_bcount(*pp_s_com_father); ++ brelse(*pp_s_com_father); + return REPEAT_SEARCH; + } + } +@@ -1050,8 +1051,8 @@ static int get_far_parent(struct tree_ba + return IO_ERROR; + + if (FILESYSTEM_CHANGED_TB(p_s_tb)) { +- decrement_counters_in_path(&s_path_to_neighbor_father); +- decrement_bcount(*pp_s_com_father); ++ pathrelse(&s_path_to_neighbor_father); ++ brelse(*pp_s_com_father); + return REPEAT_SEARCH; + } + +@@ -1063,7 +1064,7 @@ static int get_far_parent(struct tree_ba + FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small"); + + s_path_to_neighbor_father.path_length--; +- decrement_counters_in_path(&s_path_to_neighbor_father); ++ pathrelse(&s_path_to_neighbor_father); + return CARRY_ON; + } + +@@ -1086,10 +1087,10 @@ static int get_parents(struct tree_balan + if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) { + /* The root can not have parents. + Release nodes which previously were obtained as parents of the current node neighbors. */ +- decrement_bcount(p_s_tb->FL[n_h]); +- decrement_bcount(p_s_tb->CFL[n_h]); +- decrement_bcount(p_s_tb->FR[n_h]); +- decrement_bcount(p_s_tb->CFR[n_h]); ++ brelse(p_s_tb->FL[n_h]); ++ brelse(p_s_tb->CFL[n_h]); ++ brelse(p_s_tb->FR[n_h]); ++ brelse(p_s_tb->CFR[n_h]); + p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] = + p_s_tb->CFR[n_h] = NULL; + return CARRY_ON; +@@ -1115,9 +1116,9 @@ static int get_parents(struct tree_balan + return n_ret_value; + } + +- decrement_bcount(p_s_tb->FL[n_h]); ++ brelse(p_s_tb->FL[n_h]); + p_s_tb->FL[n_h] = p_s_curf; /* New initialization of FL[n_h]. */ +- decrement_bcount(p_s_tb->CFL[n_h]); ++ brelse(p_s_tb->CFL[n_h]); + p_s_tb->CFL[n_h] = p_s_curcf; /* New initialization of CFL[n_h]. */ + + RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) || +@@ -1145,10 +1146,10 @@ static int get_parents(struct tree_balan + p_s_tb->rkey[n_h] = n_position; + } + +- decrement_bcount(p_s_tb->FR[n_h]); ++ brelse(p_s_tb->FR[n_h]); + p_s_tb->FR[n_h] = p_s_curf; /* New initialization of FR[n_path_offset]. */ + +- decrement_bcount(p_s_tb->CFR[n_h]); ++ brelse(p_s_tb->CFR[n_h]); + p_s_tb->CFR[n_h] = p_s_curcf; /* New initialization of CFR[n_path_offset]. */ + + RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) || +@@ -1964,7 +1965,7 @@ static int get_neighbors(struct tree_bal + if (!p_s_bh) + return IO_ERROR; + if (FILESYSTEM_CHANGED_TB(p_s_tb)) { +- decrement_bcount(p_s_bh); ++ brelse(p_s_bh); + PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]); + return REPEAT_SEARCH; + } +@@ -1980,7 +1981,7 @@ static int get_neighbors(struct tree_bal + dc_size(B_N_CHILD(p_s_tb->FL[0], n_child_position)), + "PAP-8290: invalid child size of left neighbor"); + +- decrement_bcount(p_s_tb->L[n_h]); ++ brelse(p_s_tb->L[n_h]); + p_s_tb->L[n_h] = p_s_bh; + } + +@@ -2001,11 +2002,11 @@ static int get_neighbors(struct tree_bal + if (!p_s_bh) + return IO_ERROR; + if (FILESYSTEM_CHANGED_TB(p_s_tb)) { +- decrement_bcount(p_s_bh); ++ brelse(p_s_bh); + PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]); + return REPEAT_SEARCH; + } +- decrement_bcount(p_s_tb->R[n_h]); ++ brelse(p_s_tb->R[n_h]); + p_s_tb->R[n_h] = p_s_bh; + + RFALSE(!n_h +@@ -2511,16 +2512,17 @@ int fix_nodes(int n_op_mode, struct tree + } + + brelse(p_s_tb->L[i]); +- p_s_tb->L[i] = NULL; + brelse(p_s_tb->R[i]); +- p_s_tb->R[i] = NULL; + brelse(p_s_tb->FL[i]); +- p_s_tb->FL[i] = NULL; + brelse(p_s_tb->FR[i]); +- p_s_tb->FR[i] = NULL; + brelse(p_s_tb->CFL[i]); +- p_s_tb->CFL[i] = NULL; + brelse(p_s_tb->CFR[i]); ++ ++ p_s_tb->L[i] = NULL; ++ p_s_tb->R[i] = NULL; ++ p_s_tb->FL[i] = NULL; ++ p_s_tb->FR[i] = NULL; ++ p_s_tb->CFL[i] = NULL; + p_s_tb->CFR[i] = NULL; + } + +--- a/fs/reiserfs/stree.c ++++ b/fs/reiserfs/stree.c +@@ -23,7 +23,6 @@ + * get_rkey + * key_in_buffer + * decrement_bcount +- * decrement_counters_in_path + * reiserfs_check_path + * pathrelse_and_restore + * pathrelse +@@ -359,36 +358,6 @@ static inline int key_in_buffer(struct t + return 1; + } + +-inline void decrement_bcount(struct buffer_head *p_s_bh) +-{ +- if (p_s_bh) { +- if (atomic_read(&(p_s_bh->b_count))) { +- put_bh(p_s_bh); +- return; +- } +- reiserfs_panic(NULL, "PAP-5070", +- "trying to free free buffer %b", p_s_bh); +- } +-} +- +-/* Decrement b_count field of the all buffers in the path. */ +-void decrement_counters_in_path(struct treepath *p_s_search_path) +-{ +- int n_path_offset = p_s_search_path->path_length; +- +- RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET || +- n_path_offset > EXTENDED_MAX_HEIGHT - 1, +- "PAP-5080: invalid path offset of %d", n_path_offset); +- +- while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) { +- struct buffer_head *bh; +- +- bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--); +- decrement_bcount(bh); +- } +- p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; +-} +- + int reiserfs_check_path(struct treepath *p) + { + RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET, +@@ -396,12 +365,11 @@ int reiserfs_check_path(struct treepath + return 0; + } + +-/* Release all buffers in the path. Restore dirty bits clean +-** when preparing the buffer for the log +-** +-** only called from fix_nodes() +-*/ +-void pathrelse_and_restore(struct super_block *s, struct treepath *p_s_search_path) ++/* Drop the reference to each buffer in a path and restore ++ * dirty bits clean when preparing the buffer for the log. ++ * This version should only be called from fix_nodes() */ ++void pathrelse_and_restore(struct super_block *sb, ++ struct treepath *p_s_search_path) + { + int n_path_offset = p_s_search_path->path_length; + +@@ -409,16 +377,15 @@ void pathrelse_and_restore(struct super_ + "clm-4000: invalid path offset"); + + while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) { +- reiserfs_restore_prepared_buffer(s, +- PATH_OFFSET_PBUFFER +- (p_s_search_path, +- n_path_offset)); +- brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--)); ++ struct buffer_head *bh; ++ bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--); ++ reiserfs_restore_prepared_buffer(sb, bh); ++ brelse(bh); + } + p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; + } + +-/* Release all buffers in the path. */ ++/* Drop the reference to each buffer in a path */ + void pathrelse(struct treepath *p_s_search_path) + { + int n_path_offset = p_s_search_path->path_length; +@@ -631,7 +598,7 @@ int search_by_key(struct super_block *p_ + we must be careful to release all nodes in a path before we either + discard the path struct or re-use the path struct, as we do here. */ + +- decrement_counters_in_path(p_s_search_path); ++ pathrelse(p_s_search_path); + + right_neighbor_of_leaf_node = 0; + +@@ -691,7 +658,7 @@ int search_by_key(struct super_block *p_ + PROC_INFO_INC(p_s_sb, search_by_key_restarted); + PROC_INFO_INC(p_s_sb, + sbk_restarted[expected_level - 1]); +- decrement_counters_in_path(p_s_search_path); ++ pathrelse(p_s_search_path); + + /* Get the root block number so that we can repeat the search + starting from the root. */ +@@ -1868,7 +1835,7 @@ int reiserfs_do_truncate(struct reiserfs + if (journal_transaction_should_end(th, 0) || + reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) { + int orig_len_alloc = th->t_blocks_allocated; +- decrement_counters_in_path(&s_search_path); ++ pathrelse(&s_search_path); + + if (update_timestamps) { + p_s_inode->i_mtime = p_s_inode->i_ctime = diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-consistent-messages.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-consistent-messages.diff new file mode 100644 index 0000000000..11bcbfee48 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-consistent-messages.diff @@ -0,0 +1,80 @@ +From: Jeff Mahoney +Subject: reiserfs: use more consistent printk formatting + + The output format between a warning/error/panic/info/etc changes with + which one is used. + + The following patch makes the messages more internally consistent, but also + more consistent with other Linux filesystems. + +Signed-off-by: Jeff Mahoney + +-- + fs/reiserfs/prints.c | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + +--- a/fs/reiserfs/prints.c ++++ b/fs/reiserfs/prints.c +@@ -268,10 +268,10 @@ void reiserfs_warning(struct super_block + { + do_reiserfs_warning(fmt); + if (sb) +- printk(KERN_WARNING "ReiserFS: %s: warning: %s\n", +- reiserfs_bdevname(sb), error_buf); ++ printk(KERN_WARNING "REISERFS warning (device %s): %s\n", ++ sb->s_id, error_buf); + else +- printk(KERN_WARNING "ReiserFS: warning: %s\n", error_buf); ++ printk(KERN_WARNING "REISERFS warning: %s\n", error_buf); + } + + /* No newline.. reiserfs_info calls can be followed by printk's */ +@@ -279,10 +279,10 @@ void reiserfs_info(struct super_block *s + { + do_reiserfs_warning(fmt); + if (sb) +- printk(KERN_NOTICE "ReiserFS: %s: %s", +- reiserfs_bdevname(sb), error_buf); ++ printk(KERN_NOTICE "REISERFS (device %s): %s", ++ sb->s_id, error_buf); + else +- printk(KERN_NOTICE "ReiserFS: %s", error_buf); ++ printk(KERN_NOTICE "REISERFS %s:", error_buf); + } + + /* No newline.. reiserfs_printk calls can be followed by printk's */ +@@ -297,10 +297,10 @@ void reiserfs_debug(struct super_block * + #ifdef CONFIG_REISERFS_CHECK + do_reiserfs_warning(fmt); + if (s) +- printk(KERN_DEBUG "ReiserFS: %s: %s\n", +- reiserfs_bdevname(s), error_buf); ++ printk(KERN_DEBUG "REISERFS debug (device %s): %s\n", ++ s->s_id, error_buf); + else +- printk(KERN_DEBUG "ReiserFS: %s\n", error_buf); ++ printk(KERN_DEBUG "REISERFS debug: %s\n", error_buf); + #endif + } + +@@ -368,15 +368,15 @@ void reiserfs_abort(struct super_block * + do_reiserfs_warning(fmt); + + if (reiserfs_error_panic(sb)) { +- panic(KERN_CRIT "REISERFS: panic (device %s): %s\n", +- reiserfs_bdevname(sb), error_buf); ++ panic(KERN_CRIT "REISERFS panic (device %s): %s\n", sb->s_id, ++ error_buf); + } + +- if (sb->s_flags & MS_RDONLY) ++ if (reiserfs_is_journal_aborted(SB_JOURNAL(sb))) + return; + +- printk(KERN_CRIT "REISERFS: abort (device %s): %s\n", +- reiserfs_bdevname(sb), error_buf); ++ printk(KERN_CRIT "REISERFS abort (device %s): %s\n", sb->s_id, ++ error_buf); + + sb->s_flags |= MS_RDONLY; + reiserfs_journal_abort(sb, errno); diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-eliminate-per-super-xattr-lock.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-eliminate-per-super-xattr-lock.diff new file mode 100644 index 0000000000..08de42b0da --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-eliminate-per-super-xattr-lock.diff @@ -0,0 +1,573 @@ +From: Jeff Mahoney +Subject: reiserfs: eliminate per-super xattr lock + + With the switch to using inode->i_mutex locking during lookups/creation in + the xattr root, the per-super xattr lock is no longer needed. + + This patch removes it. + +Signed-off-by: Jeff Mahoney +-- + fs/reiserfs/inode.c | 14 ------- + fs/reiserfs/namei.c | 29 ---------------- + fs/reiserfs/super.c | 4 -- + fs/reiserfs/xattr.c | 70 +++++++++++++++++++------------------- + fs/reiserfs/xattr_acl.c | 74 ++++++++++++++++++----------------------- + include/linux/reiserfs_fs.h | 3 - + include/linux/reiserfs_fs_sb.h | 3 - + include/linux/reiserfs_xattr.h | 28 ++------------- + 8 files changed, 74 insertions(+), 151 deletions(-) + +--- a/fs/reiserfs/inode.c ++++ b/fs/reiserfs/inode.c +@@ -1962,19 +1962,7 @@ int reiserfs_new_inode(struct reiserfs_t + out_inserted_sd: + inode->i_nlink = 0; + th->t_trans_id = 0; /* so the caller can't use this handle later */ +- +- /* If we were inheriting an ACL, we need to release the lock so that +- * iput doesn't deadlock in reiserfs_delete_xattrs. The locking +- * code really needs to be reworked, but this will take care of it +- * for now. -jeffm */ +-#ifdef CONFIG_REISERFS_FS_POSIX_ACL +- if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) { +- reiserfs_write_unlock_xattrs(dir->i_sb); +- iput(inode); +- reiserfs_write_lock_xattrs(dir->i_sb); +- } else +-#endif +- iput(inode); ++ iput(inode); + return err; + } + +--- a/fs/reiserfs/namei.c ++++ b/fs/reiserfs/namei.c +@@ -618,9 +618,6 @@ static int reiserfs_create(struct inode + + reiserfs_write_lock(dir->i_sb); + +- if (locked) +- reiserfs_write_lock_xattrs(dir->i_sb); +- + retval = journal_begin(&th, dir->i_sb, jbegin_count); + if (retval) { + drop_new_inode(inode); +@@ -633,11 +630,6 @@ static int reiserfs_create(struct inode + if (retval) + goto out_failed; + +- if (locked) { +- reiserfs_write_unlock_xattrs(dir->i_sb); +- locked = 0; +- } +- + inode->i_op = &reiserfs_file_inode_operations; + inode->i_fop = &reiserfs_file_operations; + inode->i_mapping->a_ops = &reiserfs_address_space_operations; +@@ -662,8 +654,6 @@ static int reiserfs_create(struct inode + retval = journal_end(&th, dir->i_sb, jbegin_count); + + out_failed: +- if (locked) +- reiserfs_write_unlock_xattrs(dir->i_sb); + reiserfs_write_unlock(dir->i_sb); + return retval; + } +@@ -693,9 +683,6 @@ static int reiserfs_mknod(struct inode * + + reiserfs_write_lock(dir->i_sb); + +- if (locked) +- reiserfs_write_lock_xattrs(dir->i_sb); +- + retval = journal_begin(&th, dir->i_sb, jbegin_count); + if (retval) { + drop_new_inode(inode); +@@ -709,11 +696,6 @@ static int reiserfs_mknod(struct inode * + goto out_failed; + } + +- if (locked) { +- reiserfs_write_unlock_xattrs(dir->i_sb); +- locked = 0; +- } +- + inode->i_op = &reiserfs_special_inode_operations; + init_special_inode(inode, inode->i_mode, rdev); + +@@ -741,8 +723,6 @@ static int reiserfs_mknod(struct inode * + retval = journal_end(&th, dir->i_sb, jbegin_count); + + out_failed: +- if (locked) +- reiserfs_write_unlock_xattrs(dir->i_sb); + reiserfs_write_unlock(dir->i_sb); + return retval; + } +@@ -772,8 +752,6 @@ static int reiserfs_mkdir(struct inode * + locked = reiserfs_cache_default_acl(dir); + + reiserfs_write_lock(dir->i_sb); +- if (locked) +- reiserfs_write_lock_xattrs(dir->i_sb); + + retval = journal_begin(&th, dir->i_sb, jbegin_count); + if (retval) { +@@ -795,11 +773,6 @@ static int reiserfs_mkdir(struct inode * + goto out_failed; + } + +- if (locked) { +- reiserfs_write_unlock_xattrs(dir->i_sb); +- locked = 0; +- } +- + reiserfs_update_inode_transaction(inode); + reiserfs_update_inode_transaction(dir); + +@@ -827,8 +800,6 @@ static int reiserfs_mkdir(struct inode * + d_instantiate(dentry, inode); + retval = journal_end(&th, dir->i_sb, jbegin_count); + out_failed: +- if (locked) +- reiserfs_write_unlock_xattrs(dir->i_sb); + reiserfs_write_unlock(dir->i_sb); + return retval; + } +--- a/fs/reiserfs/super.c ++++ b/fs/reiserfs/super.c +@@ -1644,10 +1644,6 @@ static int reiserfs_fill_super(struct su + REISERFS_SB(s)->s_alloc_options.preallocmin = 0; + /* Preallocate by 16 blocks (17-1) at once */ + REISERFS_SB(s)->s_alloc_options.preallocsize = 17; +-#ifdef CONFIG_REISERFS_FS_XATTR +- /* Initialize the rwsem for xattr dir */ +- init_rwsem(&REISERFS_SB(s)->xattr_dir_sem); +-#endif + /* setup default block allocator options */ + reiserfs_init_alloc_options(s); + +--- a/fs/reiserfs/xattr.c ++++ b/fs/reiserfs/xattr.c +@@ -27,6 +27,12 @@ + * these are special cases for filesystem ACLs, they are interpreted by the + * kernel, in addition, they are negatively and positively cached and attached + * to the inode so that unnecessary lookups are avoided. ++ * ++ * Locking works like so: ++ * The xattr root (/.reiserfs_priv/xattrs) is protected by its i_mutex. ++ * The xattr dir (/.reiserfs_priv/xattrs/.) is protected by ++ * inode->xattr_sem. ++ * The xattrs themselves are likewise protected by the xattr_sem. + */ + + #include +@@ -392,16 +398,17 @@ reiserfs_delete_xattrs_filler(void *buf, + /* This is called w/ inode->i_mutex downed */ + int reiserfs_delete_xattrs(struct inode *inode) + { +- struct dentry *dir, *root; + int err = 0; ++ struct dentry *dir, *root; ++ struct reiserfs_transaction_handle th; ++ int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 + ++ 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); + + /* Skip out, an xattr has no xattrs associated with it */ + if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1) + return 0; + +- reiserfs_read_lock_xattrs(inode->i_sb); + dir = open_xa_dir(inode, XATTR_REPLACE); +- reiserfs_read_unlock_xattrs(inode->i_sb); + if (IS_ERR(dir)) { + err = PTR_ERR(dir); + goto out; +@@ -416,18 +423,26 @@ int reiserfs_delete_xattrs(struct inode + if (err) + goto out_dir; + +- /* Leftovers besides . and .. -- that's not good. */ +- if (dir->d_inode->i_nlink <= 2) { +- root = open_xa_root(inode->i_sb, XATTR_REPLACE); +- reiserfs_write_lock_xattrs(inode->i_sb); ++ /* We start a transaction here to avoid a ABBA situation ++ * between the xattr root's i_mutex and the journal lock. ++ * Inode creation will inherit an ACL, which requires a ++ * lookup. The lookup locks the xattr root i_mutex with a ++ * transaction open. Inode deletion takes teh xattr root ++ * i_mutex to delete the directory and then starts a ++ * transaction inside it. Boom. This doesn't incur much ++ * additional overhead since the reiserfs_rmdir transaction ++ * will just nest inside the outer transaction. */ ++ err = journal_begin(&th, inode->i_sb, blocks); ++ if (!err) { ++ int jerror; ++ root = dget(dir->d_parent); + mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_XATTR); + err = xattr_rmdir(root->d_inode, dir); ++ jerror = journal_end(&th, inode->i_sb, blocks); + mutex_unlock(&root->d_inode->i_mutex); +- reiserfs_write_unlock_xattrs(inode->i_sb); + dput(root); +- } else { +- reiserfs_warning(inode->i_sb, "jdm-20006", +- "Couldn't remove all entries in directory"); ++ ++ err = jerror ?: err; + } + + out_dir: +@@ -437,6 +452,9 @@ out: + if (!err) + REISERFS_I(inode)->i_flags = + REISERFS_I(inode)->i_flags & ~i_has_xattr_dir; ++ else ++ reiserfs_warning(inode->i_sb, "jdm-20004", ++ "Couldn't remove all xattrs (%d)\n", err); + return err; + } + +@@ -485,9 +503,7 @@ int reiserfs_chown_xattrs(struct inode * + if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1) + return 0; + +- reiserfs_read_lock_xattrs(inode->i_sb); + dir = open_xa_dir(inode, XATTR_REPLACE); +- reiserfs_read_unlock_xattrs(inode->i_sb); + if (IS_ERR(dir)) { + if (PTR_ERR(dir) != -ENODATA) + err = PTR_ERR(dir); +@@ -731,6 +747,11 @@ reiserfs_xattr_get(const struct inode *i + goto out; + } + ++ /* protect against concurrent access. xattrs are backed by ++ * regular files, but they're not regular files. The updates ++ * must be atomic from the perspective of the user. */ ++ mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); ++ + isize = i_size_read(dentry->d_inode); + REISERFS_I(inode)->i_flags |= i_has_xattr_dir; + +@@ -798,6 +819,7 @@ reiserfs_xattr_get(const struct inode *i + } + + out_dput: ++ mutex_unlock(&dentry->d_inode->i_mutex); + dput(dentry); + + out: +@@ -834,7 +856,6 @@ int reiserfs_xattr_del(struct inode *ino + static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char *); + /* + * Inode operation getxattr() +- * Preliminary locking: we down dentry->d_inode->i_mutex + */ + ssize_t + reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, +@@ -848,9 +869,7 @@ reiserfs_getxattr(struct dentry * dentry + return -EOPNOTSUPP; + + reiserfs_read_lock_xattr_i(dentry->d_inode); +- reiserfs_read_lock_xattrs(dentry->d_sb); + err = xah->get(dentry->d_inode, name, buffer, size); +- reiserfs_read_unlock_xattrs(dentry->d_sb); + reiserfs_read_unlock_xattr_i(dentry->d_inode); + return err; + } +@@ -866,23 +885,13 @@ reiserfs_setxattr(struct dentry *dentry, + { + struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name); + int err; +- int lock; + + if (!xah || !reiserfs_xattrs(dentry->d_sb) || + get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) + return -EOPNOTSUPP; + + reiserfs_write_lock_xattr_i(dentry->d_inode); +- lock = !has_xattr_dir(dentry->d_inode); +- if (lock) +- reiserfs_write_lock_xattrs(dentry->d_sb); +- else +- reiserfs_read_lock_xattrs(dentry->d_sb); + err = xah->set(dentry->d_inode, name, value, size, flags); +- if (lock) +- reiserfs_write_unlock_xattrs(dentry->d_sb); +- else +- reiserfs_read_unlock_xattrs(dentry->d_sb); + reiserfs_write_unlock_xattr_i(dentry->d_inode); + return err; + } +@@ -902,8 +911,6 @@ int reiserfs_removexattr(struct dentry * + return -EOPNOTSUPP; + + reiserfs_write_lock_xattr_i(dentry->d_inode); +- reiserfs_read_lock_xattrs(dentry->d_sb); +- + /* Deletion pre-operation */ + if (xah->del) { + err = xah->del(dentry->d_inode, name); +@@ -917,7 +924,6 @@ int reiserfs_removexattr(struct dentry * + mark_inode_dirty(dentry->d_inode); + + out: +- reiserfs_read_unlock_xattrs(dentry->d_sb); + reiserfs_write_unlock_xattr_i(dentry->d_inode); + return err; + } +@@ -966,8 +972,6 @@ reiserfs_listxattr_filler(void *buf, con + + /* + * Inode operation listxattr() +- * +- * Preliminary locking: we down dentry->d_inode->i_mutex + */ + ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) + { +@@ -983,9 +987,7 @@ ssize_t reiserfs_listxattr(struct dentry + return -EOPNOTSUPP; + + reiserfs_read_lock_xattr_i(dentry->d_inode); +- reiserfs_read_lock_xattrs(dentry->d_sb); + dir = open_xa_dir(dentry->d_inode, XATTR_REPLACE); +- reiserfs_read_unlock_xattrs(dentry->d_sb); + if (IS_ERR(dir)) { + err = PTR_ERR(dir); + if (err == -ENODATA) +@@ -1114,11 +1116,9 @@ static int reiserfs_check_acl(struct ino + int error = -EAGAIN; /* do regular unix permission checks by default */ + + reiserfs_read_lock_xattr_i(inode); +- reiserfs_read_lock_xattrs(inode->i_sb); + + acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); + +- reiserfs_read_unlock_xattrs(inode->i_sb); + reiserfs_read_unlock_xattr_i(inode); + + if (acl) { +--- a/fs/reiserfs/xattr_acl.c ++++ b/fs/reiserfs/xattr_acl.c +@@ -172,6 +172,29 @@ static void *posix_acl_to_disk(const str + return ERR_PTR(-EINVAL); + } + ++static inline void iset_acl(struct inode *inode, struct posix_acl **i_acl, ++ struct posix_acl *acl) ++{ ++ spin_lock(&inode->i_lock); ++ if (*i_acl != ERR_PTR(-ENODATA)) ++ posix_acl_release(*i_acl); ++ *i_acl = posix_acl_dup(acl); ++ spin_unlock(&inode->i_lock); ++} ++ ++static inline struct posix_acl *iget_acl(struct inode *inode, ++ struct posix_acl **i_acl) ++{ ++ struct posix_acl *acl = ERR_PTR(-ENODATA); ++ ++ spin_lock(&inode->i_lock); ++ if (*i_acl != ERR_PTR(-ENODATA)) ++ acl = posix_acl_dup(*i_acl); ++ spin_unlock(&inode->i_lock); ++ ++ return acl; ++} ++ + /* + * Inode operation get_posix_acl(). + * +@@ -199,11 +222,11 @@ struct posix_acl *reiserfs_get_acl(struc + return ERR_PTR(-EINVAL); + } + +- if (IS_ERR(*p_acl)) { +- if (PTR_ERR(*p_acl) == -ENODATA) +- return NULL; +- } else if (*p_acl != NULL) +- return posix_acl_dup(*p_acl); ++ acl = iget_acl(inode, p_acl); ++ if (acl && !IS_ERR(acl)) ++ return acl; ++ else if (PTR_ERR(acl) == -ENODATA) ++ return NULL; + + size = reiserfs_xattr_get(inode, name, NULL, 0); + if (size < 0) { +@@ -229,7 +252,7 @@ struct posix_acl *reiserfs_get_acl(struc + } else { + acl = posix_acl_from_disk(value, retval); + if (!IS_ERR(acl)) +- *p_acl = posix_acl_dup(acl); ++ iset_acl(inode, p_acl, acl); + } + + kfree(value); +@@ -300,16 +323,8 @@ reiserfs_set_acl(struct inode *inode, in + + kfree(value); + +- if (!error) { +- /* Release the old one */ +- if (!IS_ERR(*p_acl) && *p_acl) +- posix_acl_release(*p_acl); +- +- if (acl == NULL) +- *p_acl = ERR_PTR(-ENODATA); +- else +- *p_acl = posix_acl_dup(acl); +- } ++ if (!error) ++ iset_acl(inode, p_acl, acl); + + return error; + } +@@ -404,9 +419,7 @@ int reiserfs_cache_default_acl(struct in + if (reiserfs_posixacl(inode->i_sb) && !IS_PRIVATE(inode)) { + struct posix_acl *acl; + reiserfs_read_lock_xattr_i(inode); +- reiserfs_read_lock_xattrs(inode->i_sb); + acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT); +- reiserfs_read_unlock_xattrs(inode->i_sb); + reiserfs_read_unlock_xattr_i(inode); + ret = (acl && !IS_ERR(acl)); + if (ret) +@@ -429,9 +442,7 @@ int reiserfs_acl_chmod(struct inode *ino + return 0; + } + +- reiserfs_read_lock_xattrs(inode->i_sb); + acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); +- reiserfs_read_unlock_xattrs(inode->i_sb); + if (!acl) + return 0; + if (IS_ERR(acl)) +@@ -442,17 +453,8 @@ int reiserfs_acl_chmod(struct inode *ino + return -ENOMEM; + error = posix_acl_chmod_masq(clone, inode->i_mode); + if (!error) { +- int lock = !has_xattr_dir(inode); + reiserfs_write_lock_xattr_i(inode); +- if (lock) +- reiserfs_write_lock_xattrs(inode->i_sb); +- else +- reiserfs_read_lock_xattrs(inode->i_sb); + error = reiserfs_set_acl(inode, ACL_TYPE_ACCESS, clone); +- if (lock) +- reiserfs_write_unlock_xattrs(inode->i_sb); +- else +- reiserfs_read_unlock_xattrs(inode->i_sb); + reiserfs_write_unlock_xattr_i(inode); + } + posix_acl_release(clone); +@@ -480,14 +482,9 @@ posix_acl_access_set(struct inode *inode + static int posix_acl_access_del(struct inode *inode, const char *name) + { + struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); +- struct posix_acl **acl = &reiserfs_i->i_acl_access; + if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1) + return -EINVAL; +- if (!IS_ERR(*acl) && *acl) { +- posix_acl_release(*acl); +- *acl = ERR_PTR(-ENODATA); +- } +- ++ iset_acl(inode, &reiserfs_i->i_acl_access, ERR_PTR(-ENODATA)); + return 0; + } + +@@ -533,14 +530,9 @@ posix_acl_default_set(struct inode *inod + static int posix_acl_default_del(struct inode *inode, const char *name) + { + struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); +- struct posix_acl **acl = &reiserfs_i->i_acl_default; + if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1) + return -EINVAL; +- if (!IS_ERR(*acl) && *acl) { +- posix_acl_release(*acl); +- *acl = ERR_PTR(-ENODATA); +- } +- ++ iset_acl(inode, &reiserfs_i->i_acl_default, ERR_PTR(-ENODATA)); + return 0; + } + +--- a/include/linux/reiserfs_fs.h ++++ b/include/linux/reiserfs_fs.h +@@ -2224,7 +2224,4 @@ int reiserfs_unpack(struct inode *inode, + #define reiserfs_write_lock( sb ) lock_kernel() + #define reiserfs_write_unlock( sb ) unlock_kernel() + +-/* xattr stuff */ +-#define REISERFS_XATTR_DIR_SEM(s) (REISERFS_SB(s)->xattr_dir_sem) +- + #endif /* _LINUX_REISER_FS_H */ +--- a/include/linux/reiserfs_fs_sb.h ++++ b/include/linux/reiserfs_fs_sb.h +@@ -402,9 +402,6 @@ struct reiserfs_sb_info { + spinlock_t bitmap_lock; /* this lock on now only used to protect reserved_blocks variable */ + struct dentry *priv_root; /* root of /.reiserfs_priv */ + struct dentry *xattr_root; /* root of /.reiserfs_priv/.xa */ +-#ifdef CONFIG_REISERFS_FS_XATTR +- struct rw_semaphore xattr_dir_sem; +-#endif + int j_errno; + #ifdef CONFIG_QUOTA + char *s_qf_names[MAXQUOTAS]; +--- a/include/linux/reiserfs_xattr.h ++++ b/include/linux/reiserfs_xattr.h +@@ -67,45 +67,27 @@ extern struct reiserfs_xattr_handler use + extern struct reiserfs_xattr_handler trusted_handler; + extern struct reiserfs_xattr_handler security_handler; + +-static inline void reiserfs_write_lock_xattrs(struct super_block *sb) +-{ +- down_write(&REISERFS_XATTR_DIR_SEM(sb)); +-} +-static inline void reiserfs_write_unlock_xattrs(struct super_block *sb) +-{ +- up_write(&REISERFS_XATTR_DIR_SEM(sb)); +-} +-static inline void reiserfs_read_lock_xattrs(struct super_block *sb) +-{ +- down_read(&REISERFS_XATTR_DIR_SEM(sb)); +-} +- +-static inline void reiserfs_read_unlock_xattrs(struct super_block *sb) +-{ +- up_read(&REISERFS_XATTR_DIR_SEM(sb)); +-} +- + static inline void reiserfs_write_lock_xattr_i(struct inode *inode) + { +- down_write(&REISERFS_I(inode)->xattr_sem); ++ down_write(&REISERFS_I(inode)->i_xattr_sem); + } + static inline void reiserfs_write_unlock_xattr_i(struct inode *inode) + { +- up_write(&REISERFS_I(inode)->xattr_sem); ++ up_write(&REISERFS_I(inode)->i_xattr_sem); + } + static inline void reiserfs_read_lock_xattr_i(struct inode *inode) + { +- down_read(&REISERFS_I(inode)->xattr_sem); ++ down_read(&REISERFS_I(inode)->i_xattr_sem); + } + + static inline void reiserfs_read_unlock_xattr_i(struct inode *inode) + { +- up_read(&REISERFS_I(inode)->xattr_sem); ++ up_read(&REISERFS_I(inode)->i_xattr_sem); + } + + static inline void reiserfs_init_xattr_rwsem(struct inode *inode) + { +- init_rwsem(&REISERFS_I(inode)->xattr_sem); ++ init_rwsem(&REISERFS_I(inode)->i_xattr_sem); + } + + #else diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-journaled-xattrs.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-journaled-xattrs.diff new file mode 100644 index 0000000000..de58d89dac --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-journaled-xattrs.diff @@ -0,0 +1,422 @@ +From: Jeff Mahoney +Subject: reiserfs: journaled xattrs + + Deadlocks are possible in the xattr code between the journal lock and the + xattr sems. + + This patch implements journalling for xattr operations. The benefit is + twofold: + * It gets rid of the deadlock possibility by always ensuring that xattr + write operations are initiated inside a transaction. + * It corrects the problem where xattr backing files aren't considered any + differently than normal files, despite the fact they are metadata. + + I discussed the added journal load with Chris Mason, and we decided that + since xattrs (versus other journal activity) is fairly rare, the introduction + of larger transactions to support journaled xattrs wouldn't be too big a deal. + +Signed-off-by: Jeff Mahoney + +-- + fs/reiserfs/inode.c | 3 - + fs/reiserfs/namei.c | 14 +---- + fs/reiserfs/xattr.c | 39 +++++++++++---- + fs/reiserfs/xattr_acl.c | 105 +++++++++++++++++++++++++++++++---------- + include/linux/reiserfs_acl.h | 3 - + include/linux/reiserfs_fs.h | 4 + + include/linux/reiserfs_xattr.h | 40 ++++++++++++++- + 7 files changed, 159 insertions(+), 49 deletions(-) + +--- a/fs/reiserfs/inode.c ++++ b/fs/reiserfs/inode.c +@@ -1919,9 +1919,8 @@ int reiserfs_new_inode(struct reiserfs_t + goto out_inserted_sd; + } + +- /* XXX CHECK THIS */ + if (reiserfs_posixacl(inode->i_sb)) { +- retval = reiserfs_inherit_default_acl(dir, dentry, inode); ++ retval = reiserfs_inherit_default_acl(th, dir, dentry, inode); + if (retval) { + err = retval; + reiserfs_check_path(&path_to_key); +--- a/fs/reiserfs/namei.c ++++ b/fs/reiserfs/namei.c +@@ -607,15 +607,13 @@ static int reiserfs_create(struct inode + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + + REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); + struct reiserfs_transaction_handle th; +- int locked; + + if (!(inode = new_inode(dir->i_sb))) { + return -ENOMEM; + } + new_inode_init(inode, dir, mode); + +- locked = reiserfs_cache_default_acl(dir); +- ++ jbegin_count += reiserfs_cache_default_acl(dir); + reiserfs_write_lock(dir->i_sb); + + retval = journal_begin(&th, dir->i_sb, jbegin_count); +@@ -669,7 +667,6 @@ static int reiserfs_mknod(struct inode * + JOURNAL_PER_BALANCE_CNT * 3 + + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + + REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); +- int locked; + + if (!new_valid_dev(rdev)) + return -EINVAL; +@@ -679,8 +676,7 @@ static int reiserfs_mknod(struct inode * + } + new_inode_init(inode, dir, mode); + +- locked = reiserfs_cache_default_acl(dir); +- ++ jbegin_count += reiserfs_cache_default_acl(dir); + reiserfs_write_lock(dir->i_sb); + + retval = journal_begin(&th, dir->i_sb, jbegin_count); +@@ -737,7 +733,6 @@ static int reiserfs_mkdir(struct inode * + JOURNAL_PER_BALANCE_CNT * 3 + + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + + REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); +- int locked; + + #ifdef DISPLACE_NEW_PACKING_LOCALITIES + /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */ +@@ -749,8 +744,7 @@ static int reiserfs_mkdir(struct inode * + } + new_inode_init(inode, dir, mode); + +- locked = reiserfs_cache_default_acl(dir); +- ++ jbegin_count += reiserfs_cache_default_acl(dir); + reiserfs_write_lock(dir->i_sb); + + retval = journal_begin(&th, dir->i_sb, jbegin_count); +@@ -1037,8 +1031,6 @@ static int reiserfs_symlink(struct inode + memcpy(name, symname, strlen(symname)); + padd_item(name, item_len, strlen(symname)); + +- /* We would inherit the default ACL here, but symlinks don't get ACLs */ +- + retval = journal_begin(&th, parent_dir->i_sb, jbegin_count); + if (retval) { + drop_new_inode(inode); +--- a/fs/reiserfs/xattr_acl.c ++++ b/fs/reiserfs/xattr_acl.c +@@ -10,15 +10,17 @@ + #include + #include + +-static int reiserfs_set_acl(struct inode *inode, int type, ++static int reiserfs_set_acl(struct reiserfs_transaction_handle *th, ++ struct inode *inode, int type, + struct posix_acl *acl); + + static int + xattr_set_acl(struct inode *inode, int type, const void *value, size_t size) + { + struct posix_acl *acl; +- int error; +- ++ int error, error2; ++ struct reiserfs_transaction_handle th; ++ size_t jcreate_blocks; + if (!reiserfs_posixacl(inode->i_sb)) + return -EOPNOTSUPP; + if (!is_owner_or_cap(inode)) +@@ -36,7 +38,21 @@ xattr_set_acl(struct inode *inode, int t + } else + acl = NULL; + +- error = reiserfs_set_acl(inode, type, acl); ++ /* Pessimism: We can't assume that anything from the xattr root up ++ * has been created. */ ++ ++ jcreate_blocks = reiserfs_xattr_jcreate_nblocks(inode) + ++ reiserfs_xattr_nblocks(inode, size) * 2; ++ ++ reiserfs_write_lock(inode->i_sb); ++ error = journal_begin(&th, inode->i_sb, jcreate_blocks); ++ if (error == 0) { ++ error = reiserfs_set_acl(&th, inode, type, acl); ++ error2 = journal_end(&th, inode->i_sb, jcreate_blocks); ++ if (error2) ++ error = error2; ++ } ++ reiserfs_write_unlock(inode->i_sb); + + release_and_out: + posix_acl_release(acl); +@@ -266,7 +282,8 @@ struct posix_acl *reiserfs_get_acl(struc + * BKL held [before 2.5.x] + */ + static int +-reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) ++reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, ++ int type, struct posix_acl *acl) + { + char *name; + void *value = NULL; +@@ -310,7 +327,7 @@ reiserfs_set_acl(struct inode *inode, in + return (int)PTR_ERR(value); + } + +- error = __reiserfs_xattr_set(inode, name, value, size, 0); ++ error = reiserfs_xattr_set_handle(th, inode, name, value, size, 0); + + /* + * Ensure that the inode gets dirtied if we're only using +@@ -337,7 +354,8 @@ reiserfs_set_acl(struct inode *inode, in + /* dir->i_mutex: locked, + * inode is new and not released into the wild yet */ + int +-reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry, ++reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, ++ struct inode *dir, struct dentry *dentry, + struct inode *inode) + { + struct posix_acl *acl; +@@ -374,7 +392,8 @@ reiserfs_inherit_default_acl(struct inod + + /* Copy the default ACL to the default ACL of a new directory */ + if (S_ISDIR(inode->i_mode)) { +- err = reiserfs_set_acl(inode, ACL_TYPE_DEFAULT, acl); ++ err = reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT, ++ acl); + if (err) + goto cleanup; + } +@@ -395,9 +414,9 @@ reiserfs_inherit_default_acl(struct inod + + /* If we need an ACL.. */ + if (need_acl > 0) { +- err = +- reiserfs_set_acl(inode, ACL_TYPE_ACCESS, +- acl_copy); ++ err = reiserfs_set_acl(th, inode, ++ ACL_TYPE_ACCESS, ++ acl_copy); + if (err) + goto cleanup_copy; + } +@@ -415,21 +434,45 @@ reiserfs_inherit_default_acl(struct inod + return err; + } + +-/* Looks up and caches the result of the default ACL. +- * We do this so that we don't need to carry the xattr_sem into +- * reiserfs_new_inode if we don't need to */ ++/* This is used to cache the default acl before a new object is created. ++ * The biggest reason for this is to get an idea of how many blocks will ++ * actually be required for the create operation if we must inherit an ACL. ++ * An ACL write can add up to 3 object creations and an additional file write ++ * so we'd prefer not to reserve that many blocks in the journal if we can. ++ * It also has the advantage of not loading the ACL with a transaction open, ++ * this may seem silly, but if the owner of the directory is doing the ++ * creation, the ACL may not be loaded since the permissions wouldn't require ++ * it. ++ * We return the number of blocks required for the transaction. ++ */ + int reiserfs_cache_default_acl(struct inode *inode) + { +- int ret = 0; +- if (reiserfs_posixacl(inode->i_sb) && !IS_PRIVATE(inode)) { +- struct posix_acl *acl; +- acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT); +- ret = (acl && !IS_ERR(acl)); +- if (ret) +- posix_acl_release(acl); ++ struct posix_acl *acl; ++ int nblocks = 0; ++ ++ if (IS_PRIVATE(inode)) ++ return 0; ++ ++ acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT); ++ ++ if (acl && !IS_ERR(acl)) { ++ int size = reiserfs_acl_size(acl->a_count); ++ ++ /* Other xattrs can be created during inode creation. We don't ++ * want to claim too many blocks, so we check to see if we ++ * we need to create the tree to the xattrs, and then we ++ * just want two files. */ ++ nblocks = reiserfs_xattr_jcreate_nblocks(inode); ++ nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); ++ ++ REISERFS_I(inode)->i_flags |= i_has_xattr_dir; ++ ++ /* We need to account for writes + bitmaps for two files */ ++ nblocks += reiserfs_xattr_nblocks(inode, size) * 4; ++ posix_acl_release(acl); + } + +- return ret; ++ return nblocks; + } + + int reiserfs_acl_chmod(struct inode *inode) +@@ -455,8 +498,22 @@ int reiserfs_acl_chmod(struct inode *ino + if (!clone) + return -ENOMEM; + error = posix_acl_chmod_masq(clone, inode->i_mode); +- if (!error) +- error = reiserfs_set_acl(inode, ACL_TYPE_ACCESS, clone); ++ if (!error) { ++ struct reiserfs_transaction_handle th; ++ size_t size = reiserfs_xattr_nblocks(inode, ++ reiserfs_acl_size(clone->a_count)); ++ reiserfs_write_lock(inode->i_sb); ++ error = journal_begin(&th, inode->i_sb, size * 2); ++ if (!error) { ++ int error2; ++ error = reiserfs_set_acl(&th, inode, ACL_TYPE_ACCESS, ++ clone); ++ error2 = journal_end(&th, inode->i_sb, size * 2); ++ if (error2) ++ error = error2; ++ } ++ reiserfs_write_unlock(inode->i_sb); ++ } + posix_acl_release(clone); + return error; + } +--- a/fs/reiserfs/xattr.c ++++ b/fs/reiserfs/xattr.c +@@ -632,8 +632,9 @@ out_dput: + * inode->i_mutex: down + */ + int +-__reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, +- size_t buffer_size, int flags) ++reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th, ++ struct inode *inode, const char *name, ++ const void *buffer, size_t buffer_size, int flags) + { + int err = 0; + struct dentry *dentry; +@@ -723,14 +724,34 @@ out_unlock: + return err; + } + +-int +-reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, +- size_t buffer_size, int flags) ++/* We need to start a transaction to maintain lock ordering */ ++int reiserfs_xattr_set(struct inode *inode, const char *name, ++ const void *buffer, size_t buffer_size, int flags) + { +- int err = __reiserfs_xattr_set(inode, name, buffer, buffer_size, flags); +- if (err == -ENODATA) +- err = 0; +- return err; ++ ++ struct reiserfs_transaction_handle th; ++ int error, error2; ++ size_t jbegin_count = reiserfs_xattr_nblocks(inode, buffer_size); ++ ++ if (!(flags & XATTR_REPLACE)) ++ jbegin_count += reiserfs_xattr_jcreate_nblocks(inode); ++ ++ reiserfs_write_lock(inode->i_sb); ++ error = journal_begin(&th, inode->i_sb, jbegin_count); ++ if (error) { ++ reiserfs_write_unlock(inode->i_sb); ++ return error; ++ } ++ ++ error = reiserfs_xattr_set_handle(&th, inode, name, ++ buffer, buffer_size, flags); ++ ++ error2 = journal_end(&th, inode->i_sb, jbegin_count); ++ if (error == 0) ++ error = error2; ++ reiserfs_write_unlock(inode->i_sb); ++ ++ return error; + } + + /* +--- a/include/linux/reiserfs_acl.h ++++ b/include/linux/reiserfs_acl.h +@@ -49,7 +49,8 @@ static inline int reiserfs_acl_count(siz + #ifdef CONFIG_REISERFS_FS_POSIX_ACL + struct posix_acl *reiserfs_get_acl(struct inode *inode, int type); + int reiserfs_acl_chmod(struct inode *inode); +-int reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry, ++int reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, ++ struct inode *dir, struct dentry *dentry, + struct inode *inode); + int reiserfs_cache_default_acl(struct inode *dir); + extern struct xattr_handler reiserfs_posix_acl_default_handler; +--- a/include/linux/reiserfs_fs.h ++++ b/include/linux/reiserfs_fs.h +@@ -1615,6 +1615,10 @@ struct reiserfs_journal_header { + #define JOURNAL_MAX_COMMIT_AGE 30 + #define JOURNAL_MAX_TRANS_AGE 30 + #define JOURNAL_PER_BALANCE_CNT (3 * (MAX_HEIGHT-2) + 9) ++#define JOURNAL_BLOCKS_PER_OBJECT(sb) (JOURNAL_PER_BALANCE_CNT * 3 + \ ++ 2 * (REISERFS_QUOTA_INIT_BLOCKS(sb) + \ ++ REISERFS_QUOTA_TRANS_BLOCKS(sb))) ++ + #ifdef CONFIG_QUOTA + /* We need to update data and inode (atime) */ + #define REISERFS_QUOTA_TRANS_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & (1<i_sb->s_blocksize); ++ ret >>= inode->i_sb->s_blocksize_bits; ++ } ++ return ret; ++} ++ ++/* We may have to create up to 3 objects: xattr root, xattr dir, xattr file. ++ * Let's try to be smart about it. ++ * xattr root: We cache it. If it's not cached, we may need to create it. ++ * xattr dir: If anything has been loaded for this inode, we can set a flag ++ * saying so. ++ * xattr file: Since we don't cache xattrs, we can't tell. We always include ++ * blocks for it. ++ * ++ * However, since root and dir can be created between calls - YOU MUST SAVE ++ * THIS VALUE. ++ */ ++static inline size_t reiserfs_xattr_jcreate_nblocks(struct inode *inode) ++{ ++ size_t nblocks = JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); ++ ++ if ((REISERFS_I(inode)->i_flags & i_has_xattr_dir) == 0) { ++ nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); ++ if (REISERFS_SB(inode->i_sb)->xattr_root == NULL) ++ nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); ++ } ++ ++ return nblocks; ++} ++ + static inline void reiserfs_init_xattr_rwsem(struct inode *inode) + { + init_rwsem(&REISERFS_I(inode)->i_xattr_sem); diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-kill-xattr-readdir.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-kill-xattr-readdir.diff new file mode 100644 index 0000000000..421a3dcc98 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-kill-xattr-readdir.diff @@ -0,0 +1,582 @@ +From: Jeff Mahoney +Subject: reiserfs: use generic readdir for operations across all xattrs + + The current reiserfs xattr implementation open codes reiserfs_readdir and + frees the path before calling the filldir function. Typically, the filldir + function is something that modifies the file system, such as a chown or + an inode deletion that also require reading of an inode associated with each + direntry. Since the file system is modified, the path retained becomes + invalid for the next run. In addition, it runs backwards in attempt to + minimize activity. + + This is clearly suboptimal from a code cleanliness perspective as well as + performance-wise. + + This patch implements a generic reiserfs_for_each_xattr that uses the generic + readdir and a specific filldir routine that simply populates an array of + dentries and then performs a specific operation on them. When all files have + been operated on, it then calls the operation on the directory itself. + + The result is a noticable code reduction and better performance. + +Signed-off-by: Jeff Mahoney + +-- + fs/reiserfs/dir.c | 28 +-- + fs/reiserfs/xattr.c | 402 ++++++++++++-------------------------------- + include/linux/reiserfs_fs.h | 1 + 3 files changed, 131 insertions(+), 300 deletions(-) + +--- a/fs/reiserfs/dir.c ++++ b/fs/reiserfs/dir.c +@@ -41,10 +41,10 @@ static int reiserfs_dir_fsync(struct fil + + #define store_ih(where,what) copy_item_head (where, what) + +-// +-static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir) ++int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, ++ filldir_t filldir, loff_t *pos) + { +- struct inode *inode = filp->f_path.dentry->d_inode; ++ struct inode *inode = dentry->d_inode; + struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ + INITIALIZE_PATH(path_to_entry); + struct buffer_head *bh; +@@ -64,13 +64,9 @@ static int reiserfs_readdir(struct file + + /* form key for search the next directory entry using f_pos field of + file structure */ +- make_cpu_key(&pos_key, inode, +- (filp->f_pos) ? (filp->f_pos) : DOT_OFFSET, TYPE_DIRENTRY, +- 3); ++ make_cpu_key(&pos_key, inode, *pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3); + next_pos = cpu_key_k_offset(&pos_key); + +- /* reiserfs_warning (inode->i_sb, "reiserfs_readdir 1: f_pos = %Ld", filp->f_pos); */ +- + path_to_entry.reada = PATH_READA; + while (1) { + research: +@@ -144,7 +140,7 @@ static int reiserfs_readdir(struct file + /* Ignore the .reiserfs_priv entry */ + if (reiserfs_xattrs(inode->i_sb) && + !old_format_only(inode->i_sb) && +- filp->f_path.dentry == inode->i_sb->s_root && ++ dentry == inode->i_sb->s_root && + REISERFS_SB(inode->i_sb)->priv_root && + REISERFS_SB(inode->i_sb)->priv_root->d_inode + && deh_objectid(deh) == +@@ -156,7 +152,7 @@ static int reiserfs_readdir(struct file + } + + d_off = deh_offset(deh); +- filp->f_pos = d_off; ++ *pos = d_off; + d_ino = deh_objectid(deh); + if (d_reclen <= 32) { + local_buf = small_buf; +@@ -223,15 +219,21 @@ static int reiserfs_readdir(struct file + + } /* while */ + +- end: +- filp->f_pos = next_pos; ++end: ++ *pos = next_pos; + pathrelse(&path_to_entry); + reiserfs_check_path(&path_to_entry); +- out: ++out: + reiserfs_write_unlock(inode->i_sb); + return ret; + } + ++static int reiserfs_readdir(struct file *file, void *dirent, filldir_t filldir) ++{ ++ struct dentry *dentry = file->f_path.dentry; ++ return reiserfs_readdir_dentry(dentry, dirent, filldir, &file->f_pos); ++} ++ + /* compose directory item containing "." and ".." entries (entries are + not aligned to 4 byte boundary) */ + /* the last four params are LE */ +--- a/fs/reiserfs/xattr.c ++++ b/fs/reiserfs/xattr.c +@@ -167,218 +167,65 @@ static struct dentry *open_xa_dir(const + + } + +-/* +- * this is very similar to fs/reiserfs/dir.c:reiserfs_readdir, but +- * we need to drop the path before calling the filldir struct. That +- * would be a big performance hit to the non-xattr case, so I've copied +- * the whole thing for now. --clm +- * +- * the big difference is that I go backwards through the directory, +- * and don't mess with f->f_pos, but the idea is the same. Do some +- * action on each and every entry in the directory. +- * +- * we're called with i_mutex held, so there are no worries about the directory +- * changing underneath us. +- */ +-static int __xattr_readdir(struct inode *inode, void *dirent, filldir_t filldir) +-{ +- struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ +- INITIALIZE_PATH(path_to_entry); +- struct buffer_head *bh; +- int entry_num; +- struct item_head *ih, tmp_ih; +- int search_res; +- char *local_buf; +- loff_t next_pos; +- char small_buf[32]; /* avoid kmalloc if we can */ +- struct reiserfs_de_head *deh; +- int d_reclen; +- char *d_name; +- off_t d_off; +- ino_t d_ino; +- struct reiserfs_dir_entry de; +- +- /* form key for search the next directory entry using f_pos field of +- file structure */ +- next_pos = max_reiserfs_offset(inode); +- +- while (1) { +- research: +- if (next_pos <= DOT_DOT_OFFSET) +- break; +- make_cpu_key(&pos_key, inode, next_pos, TYPE_DIRENTRY, 3); +- +- search_res = +- search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry, +- &de); +- if (search_res == IO_ERROR) { +- // FIXME: we could just skip part of directory which could +- // not be read +- pathrelse(&path_to_entry); +- return -EIO; +- } +- +- if (search_res == NAME_NOT_FOUND) +- de.de_entry_num--; +- +- set_de_name_and_namelen(&de); +- entry_num = de.de_entry_num; +- deh = &(de.de_deh[entry_num]); +- +- bh = de.de_bh; +- ih = de.de_ih; +- +- if (!is_direntry_le_ih(ih)) { +- reiserfs_error(inode->i_sb, "jdm-20000", +- "not direntry %h", ih); +- break; +- } +- copy_item_head(&tmp_ih, ih); +- +- /* we must have found item, that is item of this directory, */ +- RFALSE(COMP_SHORT_KEYS(&(ih->ih_key), &pos_key), +- "vs-9000: found item %h does not match to dir we readdir %K", +- ih, &pos_key); +- +- if (deh_offset(deh) <= DOT_DOT_OFFSET) { +- break; +- } +- +- /* look for the previous entry in the directory */ +- next_pos = deh_offset(deh) - 1; +- +- if (!de_visible(deh)) +- /* it is hidden entry */ +- continue; +- +- d_reclen = entry_length(bh, ih, entry_num); +- d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh); +- d_off = deh_offset(deh); +- d_ino = deh_objectid(deh); +- +- if (!d_name[d_reclen - 1]) +- d_reclen = strlen(d_name); +- +- if (d_reclen > REISERFS_MAX_NAME(inode->i_sb->s_blocksize)) { +- /* too big to send back to VFS */ +- continue; +- } +- +- /* Ignore the .reiserfs_priv entry */ +- if (reiserfs_xattrs(inode->i_sb) && +- !old_format_only(inode->i_sb) && +- deh_objectid(deh) == +- le32_to_cpu(INODE_PKEY +- (REISERFS_SB(inode->i_sb)->priv_root->d_inode)-> +- k_objectid)) +- continue; +- +- if (d_reclen <= 32) { +- local_buf = small_buf; +- } else { +- local_buf = kmalloc(d_reclen, GFP_NOFS); +- if (!local_buf) { +- pathrelse(&path_to_entry); +- return -ENOMEM; +- } +- if (item_moved(&tmp_ih, &path_to_entry)) { +- kfree(local_buf); +- +- /* sigh, must retry. Do this same offset again */ +- next_pos = d_off; +- goto research; +- } +- } +- +- // Note, that we copy name to user space via temporary +- // buffer (local_buf) because filldir will block if +- // user space buffer is swapped out. At that time +- // entry can move to somewhere else +- memcpy(local_buf, d_name, d_reclen); +- +- /* the filldir function might need to start transactions, +- * or do who knows what. Release the path now that we've +- * copied all the important stuff out of the deh +- */ +- pathrelse(&path_to_entry); +- +- if (filldir(dirent, local_buf, d_reclen, d_off, d_ino, +- DT_UNKNOWN) < 0) { +- if (local_buf != small_buf) { +- kfree(local_buf); +- } +- goto end; +- } +- if (local_buf != small_buf) { +- kfree(local_buf); +- } +- } /* while */ +- +- end: +- pathrelse(&path_to_entry); +- return 0; +-} +- +-/* +- * this could be done with dedicated readdir ops for the xattr files, +- * but I want to get something working asap +- * this is stolen from vfs_readdir +- * +- */ +-static +-int xattr_readdir(struct inode *inode, filldir_t filler, void *buf) +-{ +- int res = -ENOENT; +- if (!IS_DEADDIR(inode)) { +- lock_kernel(); +- res = __xattr_readdir(inode, buf, filler); +- unlock_kernel(); +- } +- return res; +-} +- + /* The following are side effects of other operations that aren't explicitly + * modifying extended attributes. This includes operations such as permissions + * or ownership changes, object deletions, etc. */ ++struct reiserfs_dentry_buf { ++ struct dentry *xadir; ++ int count; ++ struct dentry *dentries[8]; ++}; + + static int +-reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen, +- loff_t offset, u64 ino, unsigned int d_type) ++fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset, ++ u64 ino, unsigned int d_type) + { +- struct dentry *xadir = (struct dentry *)buf; ++ struct reiserfs_dentry_buf *dbuf = buf; + struct dentry *dentry; +- int err = 0; + +- dentry = lookup_one_len(name, xadir, namelen); ++ if (dbuf->count == ARRAY_SIZE(dbuf->dentries)) ++ return -ENOSPC; ++ ++ if (name[0] == '.' && (name[1] == '\0' || ++ (name[1] == '.' && name[2] == '\0'))) ++ return 0; ++ ++ dentry = lookup_one_len(name, dbuf->xadir, namelen); + if (IS_ERR(dentry)) { +- err = PTR_ERR(dentry); +- goto out; ++ return PTR_ERR(dentry); + } else if (!dentry->d_inode) { +- err = -ENODATA; +- goto out_file; ++ /* A directory entry exists, but no file? */ ++ reiserfs_error(dentry->d_sb, "xattr-20003", ++ "Corrupted directory: xattr %s listed but " ++ "not found for file %s.\n", ++ dentry->d_name.name, dbuf->xadir->d_name.name); ++ dput(dentry); ++ return -EIO; + } + +- /* Skip directories.. */ +- if (S_ISDIR(dentry->d_inode->i_mode)) +- goto out_file; +- +- err = xattr_unlink(xadir->d_inode, dentry); +- +-out_file: +- dput(dentry); ++ dbuf->dentries[dbuf->count++] = dentry; ++ return 0; ++} + +-out: +- return err; ++static void ++cleanup_dentry_buf(struct reiserfs_dentry_buf *buf) ++{ ++ int i; ++ for (i = 0; i < buf->count; i++) ++ if (buf->dentries[i]) ++ dput(buf->dentries[i]); + } + +-/* This is called w/ inode->i_mutex downed */ +-int reiserfs_delete_xattrs(struct inode *inode) ++static int reiserfs_for_each_xattr(struct inode *inode, ++ int (*action)(struct dentry *, void *), ++ void *data) + { +- int err = -ENODATA; +- struct dentry *dir, *root; +- struct reiserfs_transaction_handle th; +- int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 + +- 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); ++ struct dentry *dir; ++ int i, err = 0; ++ loff_t pos = 0; ++ struct reiserfs_dentry_buf buf = { ++ .count = 0, ++ }; + + /* Skip out, an xattr has no xattrs associated with it */ + if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1) +@@ -389,117 +236,97 @@ int reiserfs_delete_xattrs(struct inode + err = PTR_ERR(dir); + goto out; + } else if (!dir->d_inode) { +- dput(dir); +- goto out; ++ err = 0; ++ goto out_dir; + } + + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); +- err = xattr_readdir(dir->d_inode, reiserfs_delete_xattrs_filler, dir); +- mutex_unlock(&dir->d_inode->i_mutex); +- if (err) { +- dput(dir); +- goto out; ++ buf.xadir = dir; ++ err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos); ++ while ((err == 0 || err == -ENOSPC) && buf.count) { ++ err = 0; ++ ++ for (i = 0; i < buf.count && buf.dentries[i]; i++) { ++ int lerr = 0; ++ struct dentry *dentry = buf.dentries[i]; ++ ++ if (err == 0 && !S_ISDIR(dentry->d_inode->i_mode)) ++ lerr = action(dentry, data); ++ ++ dput(dentry); ++ buf.dentries[i] = NULL; ++ err = lerr ?: err; ++ } ++ buf.count = 0; ++ if (!err) ++ err = reiserfs_readdir_dentry(dir, &buf, ++ fill_with_dentries, &pos); + } ++ mutex_unlock(&dir->d_inode->i_mutex); + +- root = dget(dir->d_parent); +- dput(dir); ++ /* Clean up after a failed readdir */ ++ cleanup_dentry_buf(&buf); + +- /* We start a transaction here to avoid a ABBA situation +- * between the xattr root's i_mutex and the journal lock. +- * Inode creation will inherit an ACL, which requires a +- * lookup. The lookup locks the xattr root i_mutex with a +- * transaction open. Inode deletion takes teh xattr root +- * i_mutex to delete the directory and then starts a +- * transaction inside it. Boom. This doesn't incur much +- * additional overhead since the reiserfs_rmdir transaction +- * will just nest inside the outer transaction. */ +- err = journal_begin(&th, inode->i_sb, blocks); + if (!err) { +- int jerror; +- mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_XATTR); +- err = xattr_rmdir(root->d_inode, dir); +- jerror = journal_end(&th, inode->i_sb, blocks); +- mutex_unlock(&root->d_inode->i_mutex); +- err = jerror ?: err; ++ /* We start a transaction here to avoid a ABBA situation ++ * between the xattr root's i_mutex and the journal lock. ++ * This doesn't incur much additional overhead since the ++ * new transaction will just nest inside the ++ * outer transaction. */ ++ int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 + ++ 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); ++ struct reiserfs_transaction_handle th; ++ err = journal_begin(&th, inode->i_sb, blocks); ++ if (!err) { ++ int jerror; ++ mutex_lock_nested(&dir->d_parent->d_inode->i_mutex, ++ I_MUTEX_XATTR); ++ err = action(dir, data); ++ jerror = journal_end(&th, inode->i_sb, blocks); ++ mutex_unlock(&dir->d_parent->d_inode->i_mutex); ++ err = jerror ?: err; ++ } + } +- +- dput(root); ++out_dir: ++ dput(dir); + out: +- if (err) +- reiserfs_warning(inode->i_sb, "jdm-20004", +- "Couldn't remove all xattrs (%d)\n", err); ++ /* -ENODATA isn't an error */ ++ if (err == -ENODATA) ++ err = 0; + return err; + } + +-struct reiserfs_chown_buf { +- struct inode *inode; +- struct dentry *xadir; +- struct iattr *attrs; +-}; +- +-/* XXX: If there is a better way to do this, I'd love to hear about it */ +-static int +-reiserfs_chown_xattrs_filler(void *buf, const char *name, int namelen, +- loff_t offset, u64 ino, unsigned int d_type) ++static int delete_one_xattr(struct dentry *dentry, void *data) + { +- struct reiserfs_chown_buf *chown_buf = (struct reiserfs_chown_buf *)buf; +- struct dentry *xafile, *xadir = chown_buf->xadir; +- struct iattr *attrs = chown_buf->attrs; +- int err = 0; ++ struct inode *dir = dentry->d_parent->d_inode; + +- xafile = lookup_one_len(name, xadir, namelen); +- if (IS_ERR(xafile)) +- return PTR_ERR(xafile); +- else if (!xafile->d_inode) { +- dput(xafile); +- return -ENODATA; +- } ++ /* This is the xattr dir, handle specially. */ ++ if (S_ISDIR(dentry->d_inode->i_mode)) ++ return xattr_rmdir(dir, dentry); + +- if (!S_ISDIR(xafile->d_inode->i_mode)) { +- mutex_lock_nested(&xafile->d_inode->i_mutex, I_MUTEX_CHILD); +- err = reiserfs_setattr(xafile, attrs); +- mutex_unlock(&xafile->d_inode->i_mutex); +- } +- dput(xafile); ++ return xattr_unlink(dir, dentry); ++} ++ ++static int chown_one_xattr(struct dentry *dentry, void *data) ++{ ++ struct iattr *attrs = data; ++ return reiserfs_setattr(dentry, attrs); ++} + ++/* No i_mutex, but the inode is unconnected. */ ++int reiserfs_delete_xattrs(struct inode *inode) ++{ ++ int err = reiserfs_for_each_xattr(inode, delete_one_xattr, NULL); ++ if (err) ++ reiserfs_warning(inode->i_sb, "jdm-20004", ++ "Couldn't delete all xattrs (%d)\n", err); + return err; + } + ++/* inode->i_mutex: down */ + int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs) + { +- struct dentry *dir; +- int err = 0; +- struct reiserfs_chown_buf buf; +- unsigned int ia_valid = attrs->ia_valid; +- +- /* Skip out, an xattr has no xattrs associated with it */ +- if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1) +- return 0; +- +- dir = open_xa_dir(inode, XATTR_REPLACE); +- if (IS_ERR(dir)) { +- if (PTR_ERR(dir) != -ENODATA) +- err = PTR_ERR(dir); +- goto out; +- } else if (!dir->d_inode) +- goto out_dir; +- +- attrs->ia_valid &= (ATTR_UID | ATTR_GID | ATTR_CTIME); +- buf.xadir = dir; +- buf.attrs = attrs; +- buf.inode = inode; +- +- mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); +- err = xattr_readdir(dir->d_inode, reiserfs_chown_xattrs_filler, &buf); +- +- if (!err) +- err = reiserfs_setattr(dir, attrs); +- mutex_unlock(&dir->d_inode->i_mutex); +- +- attrs->ia_valid = ia_valid; +-out_dir: +- dput(dir); +-out: ++ int err = reiserfs_for_each_xattr(inode, chown_one_xattr, attrs); + if (err) + reiserfs_warning(inode->i_sb, "jdm-20007", + "Couldn't chown all xattrs (%d)\n", err); +@@ -1004,6 +831,7 @@ ssize_t reiserfs_listxattr(struct dentry + { + struct dentry *dir; + int err = 0; ++ loff_t pos = 0; + struct listxattr_buf buf = { + .inode = dentry->d_inode, + .buf = buffer, +@@ -1026,7 +854,7 @@ ssize_t reiserfs_listxattr(struct dentry + } + + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); +- err = xattr_readdir(dir->d_inode, listxattr_filler, &buf); ++ err = reiserfs_readdir_dentry(dir, &buf, listxattr_filler, &pos); + mutex_unlock(&dir->d_inode->i_mutex); + + if (!err) +--- a/include/linux/reiserfs_fs.h ++++ b/include/linux/reiserfs_fs.h +@@ -1984,6 +1984,7 @@ extern const struct inode_operations rei + extern const struct inode_operations reiserfs_symlink_inode_operations; + extern const struct inode_operations reiserfs_special_inode_operations; + extern const struct file_operations reiserfs_dir_operations; ++int reiserfs_readdir_dentry(struct dentry *, void *, filldir_t, loff_t *); + + /* tail_conversion.c */ + int direct2indirect(struct reiserfs_transaction_handle *, struct inode *, diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-make-per-inode-xattr-locking-more-fine-grained.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-make-per-inode-xattr-locking-more-fine-grained.diff new file mode 100644 index 0000000000..c1b51a456f --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-make-per-inode-xattr-locking-more-fine-grained.diff @@ -0,0 +1,421 @@ +From: Jeff Mahoney +Subject: reiserfs: make per-inode xattr locking more fine grained + + The per-inode locking can be made more fine-grained to surround just the + interaction with the filesystem itself. This really only applies to protecting + reads during a write, since concurrent writes are barred with inode->i_mutex + at the vfs level. + +Signed-off-by: Jeff Mahoney + +-- + fs/reiserfs/xattr.c | 114 +++++++++++++++++++---------------------- + fs/reiserfs/xattr_acl.c | 7 -- + include/linux/reiserfs_fs_i.h | 2 + include/linux/reiserfs_xattr.h | 22 ------- + 4 files changed, 55 insertions(+), 90 deletions(-) + +--- a/fs/reiserfs/xattr_acl.c ++++ b/fs/reiserfs/xattr_acl.c +@@ -418,9 +418,7 @@ int reiserfs_cache_default_acl(struct in + int ret = 0; + if (reiserfs_posixacl(inode->i_sb) && !IS_PRIVATE(inode)) { + struct posix_acl *acl; +- reiserfs_read_lock_xattr_i(inode); + acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT); +- reiserfs_read_unlock_xattr_i(inode); + ret = (acl && !IS_ERR(acl)); + if (ret) + posix_acl_release(acl); +@@ -452,11 +450,8 @@ int reiserfs_acl_chmod(struct inode *ino + if (!clone) + return -ENOMEM; + error = posix_acl_chmod_masq(clone, inode->i_mode); +- if (!error) { +- reiserfs_write_lock_xattr_i(inode); ++ if (!error) + error = reiserfs_set_acl(inode, ACL_TYPE_ACCESS, clone); +- reiserfs_write_unlock_xattr_i(inode); +- } + posix_acl_release(clone); + return error; + } +--- a/fs/reiserfs/xattr.c ++++ b/fs/reiserfs/xattr.c +@@ -29,10 +29,8 @@ + * to the inode so that unnecessary lookups are avoided. + * + * Locking works like so: +- * The xattr root (/.reiserfs_priv/xattrs) is protected by its i_mutex. +- * The xattr dir (/.reiserfs_priv/xattrs/.) is protected by +- * inode->xattr_sem. +- * The xattrs themselves are likewise protected by the xattr_sem. ++ * Directory components (xattr root, xattr dir) are protectd by their i_mutex. ++ * The xattrs themselves are protected by the xattr_sem. + */ + + #include +@@ -55,6 +53,8 @@ + #define PRIVROOT_NAME ".reiserfs_priv" + #define XAROOT_NAME "xattrs" + ++static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char *); ++ + /* Helpers for inode ops. We do this so that we don't have all the VFS + * overhead and also for proper i_mutex annotation. + * dir->i_mutex must be held for all of them. */ +@@ -339,12 +339,14 @@ int xattr_readdir(struct inode *inode, f + return res; + } + ++/* expects xadir->d_inode->i_mutex to be locked */ + static int + __reiserfs_xattr_del(struct dentry *xadir, const char *name, int namelen) + { + struct dentry *dentry; + struct inode *dir = xadir->d_inode; + int err = 0; ++ struct reiserfs_xattr_handler *xah; + + dentry = lookup_one_len(name, xadir, namelen); + if (IS_ERR(dentry)) { +@@ -372,6 +374,14 @@ __reiserfs_xattr_del(struct dentry *xadi + return -EIO; + } + ++ /* Deletion pre-operation */ ++ xah = find_xattr_handler_prefix(name); ++ if (xah && xah->del) { ++ err = xah->del(dentry->d_inode, name); ++ if (err) ++ goto out; ++ } ++ + err = xattr_unlink(dir, dentry); + + out_file: +@@ -398,7 +408,7 @@ reiserfs_delete_xattrs_filler(void *buf, + /* This is called w/ inode->i_mutex downed */ + int reiserfs_delete_xattrs(struct inode *inode) + { +- int err = 0; ++ int err = -ENODATA; + struct dentry *dir, *root; + struct reiserfs_transaction_handle th; + int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 + +@@ -414,14 +424,19 @@ int reiserfs_delete_xattrs(struct inode + goto out; + } else if (!dir->d_inode) { + dput(dir); +- return 0; ++ goto out; + } + + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); + err = xattr_readdir(dir->d_inode, reiserfs_delete_xattrs_filler, dir); + mutex_unlock(&dir->d_inode->i_mutex); +- if (err) +- goto out_dir; ++ if (err) { ++ dput(dir); ++ goto out; ++ } ++ ++ root = dget(dir->d_parent); ++ dput(dir); + + /* We start a transaction here to avoid a ABBA situation + * between the xattr root's i_mutex and the journal lock. +@@ -435,19 +450,14 @@ int reiserfs_delete_xattrs(struct inode + err = journal_begin(&th, inode->i_sb, blocks); + if (!err) { + int jerror; +- root = dget(dir->d_parent); + mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_XATTR); + err = xattr_rmdir(root->d_inode, dir); + jerror = journal_end(&th, inode->i_sb, blocks); + mutex_unlock(&root->d_inode->i_mutex); +- dput(root); +- + err = jerror ?: err; + } + +-out_dir: +- dput(dir); +- ++ dput(root); + out: + if (!err) + REISERFS_I(inode)->i_flags = +@@ -484,7 +494,7 @@ reiserfs_chown_xattrs_filler(void *buf, + + if (!S_ISDIR(xafile->d_inode->i_mode)) { + mutex_lock_nested(&xafile->d_inode->i_mutex, I_MUTEX_CHILD); +- err = notify_change(xafile, attrs); ++ err = reiserfs_setattr(xafile, attrs); + mutex_unlock(&xafile->d_inode->i_mutex); + } + dput(xafile); +@@ -520,13 +530,16 @@ int reiserfs_chown_xattrs(struct inode * + err = xattr_readdir(dir->d_inode, reiserfs_chown_xattrs_filler, &buf); + + if (!err) +- err = notify_change(dir, attrs); ++ err = reiserfs_setattr(dir, attrs); + mutex_unlock(&dir->d_inode->i_mutex); + + attrs->ia_valid = ia_valid; + out_dir: + dput(dir); + out: ++ if (err) ++ reiserfs_warning(inode->i_sb, "jdm-20007", ++ "Couldn't chown all xattrs (%d)\n", err); + return err; + } + +@@ -635,9 +648,8 @@ reiserfs_xattr_set(struct inode *inode, + if (get_inode_sd_version(inode) == STAT_DATA_V1) + return -EOPNOTSUPP; + +- /* Empty xattrs are ok, they're just empty files, no hash */ +- if (buffer && buffer_size) +- xahash = xattr_hash(buffer, buffer_size); ++ if (!buffer) ++ return reiserfs_xattr_del(inode, name); + + dentry = get_xa_file_dentry(inode, name, flags); + if (IS_ERR(dentry)) { +@@ -645,13 +657,19 @@ reiserfs_xattr_set(struct inode *inode, + goto out; + } + ++ down_write(&REISERFS_I(inode)->i_xattr_sem); ++ ++ xahash = xattr_hash(buffer, buffer_size); + REISERFS_I(inode)->i_flags |= i_has_xattr_dir; + + /* Resize it so we're ok to write there */ + newattrs.ia_size = buffer_size; ++ newattrs.ia_ctime = current_fs_time(inode->i_sb); + newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; + mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); +- err = notify_change(dentry, &newattrs); ++ down_write(&dentry->d_inode->i_alloc_sem); ++ err = reiserfs_setattr(dentry, &newattrs); ++ up_write(&dentry->d_inode->i_alloc_sem); + mutex_unlock(&dentry->d_inode->i_mutex); + if (err) + goto out_filp; +@@ -712,6 +730,7 @@ reiserfs_xattr_set(struct inode *inode, + } + + out_filp: ++ up_write(&REISERFS_I(inode)->i_xattr_sem); + dput(dentry); + + out: +@@ -747,10 +766,7 @@ reiserfs_xattr_get(const struct inode *i + goto out; + } + +- /* protect against concurrent access. xattrs are backed by +- * regular files, but they're not regular files. The updates +- * must be atomic from the perspective of the user. */ +- mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); ++ down_read(&REISERFS_I(inode)->i_xattr_sem); + + isize = i_size_read(dentry->d_inode); + REISERFS_I(inode)->i_flags |= i_has_xattr_dir; +@@ -758,12 +774,12 @@ reiserfs_xattr_get(const struct inode *i + /* Just return the size needed */ + if (buffer == NULL) { + err = isize - sizeof(struct reiserfs_xattr_header); +- goto out_dput; ++ goto out_unlock; + } + + if (buffer_size < isize - sizeof(struct reiserfs_xattr_header)) { + err = -ERANGE; +- goto out_dput; ++ goto out_unlock; + } + + while (file_pos < isize) { +@@ -778,7 +794,7 @@ reiserfs_xattr_get(const struct inode *i + page = reiserfs_get_page(dentry->d_inode, file_pos); + if (IS_ERR(page)) { + err = PTR_ERR(page); +- goto out_dput; ++ goto out_unlock; + } + + lock_page(page); +@@ -797,7 +813,7 @@ reiserfs_xattr_get(const struct inode *i + "associated with %k", name, + INODE_PKEY(inode)); + err = -EIO; +- goto out_dput; ++ goto out_unlock; + } + hash = le32_to_cpu(rxh->h_hash); + } +@@ -818,8 +834,8 @@ reiserfs_xattr_get(const struct inode *i + err = -EIO; + } + +-out_dput: +- mutex_unlock(&dentry->d_inode->i_mutex); ++out_unlock: ++ up_read(&REISERFS_I(inode)->i_xattr_sem); + dput(dentry); + + out: +@@ -852,8 +868,6 @@ int reiserfs_xattr_del(struct inode *ino + } + + /* Actual operations that are exported to VFS-land */ +- +-static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char *); + /* + * Inode operation getxattr() + */ +@@ -868,9 +882,7 @@ reiserfs_getxattr(struct dentry * dentry + get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) + return -EOPNOTSUPP; + +- reiserfs_read_lock_xattr_i(dentry->d_inode); + err = xah->get(dentry->d_inode, name, buffer, size); +- reiserfs_read_unlock_xattr_i(dentry->d_inode); + return err; + } + +@@ -890,9 +902,7 @@ reiserfs_setxattr(struct dentry *dentry, + get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) + return -EOPNOTSUPP; + +- reiserfs_write_lock_xattr_i(dentry->d_inode); + err = xah->set(dentry->d_inode, name, value, size, flags); +- reiserfs_write_unlock_xattr_i(dentry->d_inode); + return err; + } + +@@ -910,21 +920,11 @@ int reiserfs_removexattr(struct dentry * + get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) + return -EOPNOTSUPP; + +- reiserfs_write_lock_xattr_i(dentry->d_inode); +- /* Deletion pre-operation */ +- if (xah->del) { +- err = xah->del(dentry->d_inode, name); +- if (err) +- goto out; +- } +- + err = reiserfs_xattr_del(dentry->d_inode, name); + + dentry->d_inode->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(dentry->d_inode); + +- out: +- reiserfs_write_unlock_xattr_i(dentry->d_inode); + return err; + } + +@@ -986,7 +986,6 @@ ssize_t reiserfs_listxattr(struct dentry + get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) + return -EOPNOTSUPP; + +- reiserfs_read_lock_xattr_i(dentry->d_inode); + dir = open_xa_dir(dentry->d_inode, XATTR_REPLACE); + if (IS_ERR(dir)) { + err = PTR_ERR(dir); +@@ -1005,19 +1004,16 @@ ssize_t reiserfs_listxattr(struct dentry + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); + err = xattr_readdir(dir->d_inode, reiserfs_listxattr_filler, &buf); + mutex_unlock(&dir->d_inode->i_mutex); +- if (err) +- goto out_dir; + +- if (buf.r_pos > buf.r_size && buffer != NULL) +- err = -ERANGE; +- else +- err = buf.r_pos; ++ if (!err) { ++ if (buf.r_pos > buf.r_size && buffer != NULL) ++ err = -ERANGE; ++ else ++ err = buf.r_pos; ++ } + +- out_dir: + dput(dir); +- +- out: +- reiserfs_read_unlock_xattr_i(dentry->d_inode); ++out: + return err; + } + +@@ -1115,12 +1111,8 @@ static int reiserfs_check_acl(struct ino + struct posix_acl *acl; + int error = -EAGAIN; /* do regular unix permission checks by default */ + +- reiserfs_read_lock_xattr_i(inode); +- + acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); + +- reiserfs_read_unlock_xattr_i(inode); +- + if (acl) { + if (!IS_ERR(acl)) { + error = posix_acl_permission(inode, acl, mask); +--- a/include/linux/reiserfs_fs_i.h ++++ b/include/linux/reiserfs_fs_i.h +@@ -59,7 +59,7 @@ struct reiserfs_inode_info { + struct posix_acl *i_acl_default; + #endif + #ifdef CONFIG_REISERFS_FS_XATTR +- struct rw_semaphore xattr_sem; ++ struct rw_semaphore i_xattr_sem; + #endif + struct inode vfs_inode; + }; +--- a/include/linux/reiserfs_xattr.h ++++ b/include/linux/reiserfs_xattr.h +@@ -67,24 +67,6 @@ extern struct reiserfs_xattr_handler use + extern struct reiserfs_xattr_handler trusted_handler; + extern struct reiserfs_xattr_handler security_handler; + +-static inline void reiserfs_write_lock_xattr_i(struct inode *inode) +-{ +- down_write(&REISERFS_I(inode)->i_xattr_sem); +-} +-static inline void reiserfs_write_unlock_xattr_i(struct inode *inode) +-{ +- up_write(&REISERFS_I(inode)->i_xattr_sem); +-} +-static inline void reiserfs_read_lock_xattr_i(struct inode *inode) +-{ +- down_read(&REISERFS_I(inode)->i_xattr_sem); +-} +- +-static inline void reiserfs_read_unlock_xattr_i(struct inode *inode) +-{ +- up_read(&REISERFS_I(inode)->i_xattr_sem); +-} +- + static inline void reiserfs_init_xattr_rwsem(struct inode *inode) + { + init_rwsem(&REISERFS_I(inode)->i_xattr_sem); +@@ -96,10 +78,6 @@ static inline void reiserfs_init_xattr_r + #define reiserfs_setxattr NULL + #define reiserfs_listxattr NULL + #define reiserfs_removexattr NULL +-#define reiserfs_write_lock_xattrs(sb) do {;} while(0) +-#define reiserfs_write_unlock_xattrs(sb) do {;} while(0) +-#define reiserfs_read_lock_xattrs(sb) +-#define reiserfs_read_unlock_xattrs(sb) + + #define reiserfs_permission NULL + diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rearrange-journal-abort.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rearrange-journal-abort.diff new file mode 100644 index 0000000000..f7e9b58478 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rearrange-journal-abort.diff @@ -0,0 +1,80 @@ +From: Jeff Mahoney +Subject: reiserfs: rearrange journal abort + + This patch kills off reiserfs_journal_abort as it is never called, and + combines __reiserfs_journal_abort_{soft,hard} into one function called + reiserfs_abort_journal, which performs the same work. It is silent + as opposed to the old version, since the message was always issued + after a regular 'abort' message. + +Signed-off-by: Jeff Mahoney + +--- + fs/reiserfs/journal.c | 23 ++++------------------- + fs/reiserfs/prints.c | 2 +- + include/linux/reiserfs_fs.h | 2 +- + 3 files changed, 6 insertions(+), 21 deletions(-) + +--- a/fs/reiserfs/journal.c ++++ b/fs/reiserfs/journal.c +@@ -4292,14 +4292,15 @@ static int do_journal_end(struct reiserf + return journal->j_errno; + } + +-static void __reiserfs_journal_abort_hard(struct super_block *sb) ++/* Send the file system read only and refuse new transactions */ ++void reiserfs_abort_journal(struct super_block *sb, int errno) + { + struct reiserfs_journal *journal = SB_JOURNAL(sb); + if (test_bit(J_ABORTED, &journal->j_state)) + return; + +- printk(KERN_CRIT "REISERFS: Aborting journal for filesystem on %s\n", +- reiserfs_bdevname(sb)); ++ if (!journal->j_errno) ++ journal->j_errno = errno; + + sb->s_flags |= MS_RDONLY; + set_bit(J_ABORTED, &journal->j_state); +@@ -4309,19 +4310,3 @@ static void __reiserfs_journal_abort_har + #endif + } + +-static void __reiserfs_journal_abort_soft(struct super_block *sb, int errno) +-{ +- struct reiserfs_journal *journal = SB_JOURNAL(sb); +- if (test_bit(J_ABORTED, &journal->j_state)) +- return; +- +- if (!journal->j_errno) +- journal->j_errno = errno; +- +- __reiserfs_journal_abort_hard(sb); +-} +- +-void reiserfs_journal_abort(struct super_block *sb, int errno) +-{ +- __reiserfs_journal_abort_soft(sb, errno); +-} +--- a/fs/reiserfs/prints.c ++++ b/fs/reiserfs/prints.c +@@ -389,7 +389,7 @@ void reiserfs_abort(struct super_block * + error_buf); + + sb->s_flags |= MS_RDONLY; +- reiserfs_journal_abort(sb, errno); ++ reiserfs_abort_journal(sb, errno); + } + + /* this prints internal nodes (4 keys/items in line) (dc_number, +--- a/include/linux/reiserfs_fs.h ++++ b/include/linux/reiserfs_fs.h +@@ -1771,7 +1771,7 @@ int journal_begin(struct reiserfs_transa + struct super_block *p_s_sb, unsigned long); + int journal_join_abort(struct reiserfs_transaction_handle *, + struct super_block *p_s_sb, unsigned long); +-void reiserfs_journal_abort(struct super_block *sb, int errno); ++void reiserfs_abort_journal(struct super_block *sb, int errno); + void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...); + int reiserfs_allocate_list_bitmaps(struct super_block *s, + struct reiserfs_list_bitmap *, unsigned int); diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-reiserfs-warning.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-reiserfs-warning.diff new file mode 100644 index 0000000000..6c17090ccf --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-reiserfs-warning.diff @@ -0,0 +1,2341 @@ +From: Jeff Mahoney +Subject: reiserfs: rework reiserfs_warning + + ReiserFS warnings can be somewhat inconsistent. + In some cases: + * a unique identifier may be associated with it + * the function name may be included + * the device may be printed separately + + This patch aims to make warnings more consistent. reiserfs_warning() prints + the device name, so printing it a second time is not required. The function + name for a warning is always helpful in debugging, so it is now automatically + inserted into the output. Hans has stated that every warning should have + a unique identifier. Some cases lack them, others really shouldn't have them. + reiserfs_warning() now expects an id associated with each message. In the + rare case where one isn't needed, "" will suffice. + +Signed-off-by: Jeff Mahoney + +-- + fs/reiserfs/bitmap.c | 52 +++----- + fs/reiserfs/do_balan.c | 40 +++--- + fs/reiserfs/file.c | 2 + fs/reiserfs/fix_node.c | 14 +- + fs/reiserfs/inode.c | 60 ++++----- + fs/reiserfs/item_ops.c | 60 +++++---- + fs/reiserfs/journal.c | 174 +++++++++++++++------------- + fs/reiserfs/lbalance.c | 12 + + fs/reiserfs/namei.c | 45 +++---- + fs/reiserfs/objectid.c | 5 + fs/reiserfs/prints.c | 11 + + fs/reiserfs/procfs.c | 5 + fs/reiserfs/stree.c | 107 ++++++++--------- + fs/reiserfs/super.c | 257 ++++++++++++++++++++++-------------------- + fs/reiserfs/tail_conversion.c | 6 + fs/reiserfs/xattr.c | 21 ++- + include/linux/reiserfs_fs.h | 9 - + 17 files changed, 454 insertions(+), 426 deletions(-) + +--- a/fs/reiserfs/bitmap.c ++++ b/fs/reiserfs/bitmap.c +@@ -64,8 +64,8 @@ int is_reusable(struct super_block *s, b + unsigned int bmap_count = reiserfs_bmap_count(s); + + if (block == 0 || block >= SB_BLOCK_COUNT(s)) { +- reiserfs_warning(s, +- "vs-4010: is_reusable: block number is out of range %lu (%u)", ++ reiserfs_warning(s, "vs-4010", ++ "block number is out of range %lu (%u)", + block, SB_BLOCK_COUNT(s)); + return 0; + } +@@ -79,30 +79,29 @@ int is_reusable(struct super_block *s, b + b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1; + if (block >= bmap1 && + block <= bmap1 + bmap_count) { +- reiserfs_warning(s, "vs: 4019: is_reusable: " +- "bitmap block %lu(%u) can't be freed or reused", ++ reiserfs_warning(s, "vs-4019", "bitmap block %lu(%u) " ++ "can't be freed or reused", + block, bmap_count); + return 0; + } + } else { + if (offset == 0) { +- reiserfs_warning(s, "vs: 4020: is_reusable: " +- "bitmap block %lu(%u) can't be freed or reused", ++ reiserfs_warning(s, "vs-4020", "bitmap block %lu(%u) " ++ "can't be freed or reused", + block, bmap_count); + return 0; + } + } + + if (bmap >= bmap_count) { +- reiserfs_warning(s, +- "vs-4030: is_reusable: there is no so many bitmap blocks: " +- "block=%lu, bitmap_nr=%u", block, bmap); ++ reiserfs_warning(s, "vs-4030", "bitmap for requested block " ++ "is out of range: block=%lu, bitmap_nr=%u", ++ block, bmap); + return 0; + } + + if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) { +- reiserfs_warning(s, +- "vs-4050: is_reusable: this is root block (%u), " ++ reiserfs_warning(s, "vs-4050", "this is root block (%u), " + "it must be busy", SB_ROOT_BLOCK(s)); + return 0; + } +@@ -154,8 +153,8 @@ static int scan_bitmap_block(struct reis + /* - I mean `a window of zero bits' as in description of this function - Zam. */ + + if (!bi) { +- reiserfs_warning(s, "NULL bitmap info pointer for bitmap %d", +- bmap_n); ++ reiserfs_warning(s, "jdm-4055", "NULL bitmap info pointer " ++ "for bitmap %d", bmap_n); + return 0; + } + +@@ -400,11 +399,8 @@ static void _reiserfs_free_block(struct + get_bit_address(s, block, &nr, &offset); + + if (nr >= reiserfs_bmap_count(s)) { +- reiserfs_warning(s, "vs-4075: reiserfs_free_block: " +- "block %lu is out of range on %s " +- "(nr=%u,max=%u)", block, +- reiserfs_bdevname(s), nr, +- reiserfs_bmap_count(s)); ++ reiserfs_warning(s, "vs-4075", "block %lu is out of range", ++ block); + return; + } + +@@ -416,9 +412,8 @@ static void _reiserfs_free_block(struct + + /* clear bit for the given block in bit map */ + if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) { +- reiserfs_warning(s, "vs-4080: reiserfs_free_block: " +- "free_block (%s:%lu)[dev:blocknr]: bit already cleared", +- reiserfs_bdevname(s), block); ++ reiserfs_warning(s, "vs-4080", ++ "block %lu: bit already cleared", block); + } + apbi[nr].free_count++; + journal_mark_dirty(th, s, bmbh); +@@ -477,9 +472,8 @@ static void __discard_prealloc(struct re + BUG_ON(!th->t_trans_id); + #ifdef CONFIG_REISERFS_CHECK + if (ei->i_prealloc_count < 0) +- reiserfs_warning(th->t_super, +- "zam-4001:%s: inode has negative prealloc blocks count.", +- __func__); ++ reiserfs_warning(th->t_super, "zam-4001", ++ "inode has negative prealloc blocks count."); + #endif + while (ei->i_prealloc_count > 0) { + reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block); +@@ -515,9 +509,9 @@ void reiserfs_discard_all_prealloc(struc + i_prealloc_list); + #ifdef CONFIG_REISERFS_CHECK + if (!ei->i_prealloc_count) { +- reiserfs_warning(th->t_super, +- "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.", +- __func__); ++ reiserfs_warning(th->t_super, "zam-4001", ++ "inode is in prealloc list but has " ++ "no preallocated blocks."); + } + #endif + __discard_prealloc(th, ei); +@@ -631,8 +625,8 @@ int reiserfs_parse_alloc_options(struct + continue; + } + +- reiserfs_warning(s, "zam-4001: %s : unknown option - %s", +- __func__, this_char); ++ reiserfs_warning(s, "zam-4001", "unknown option - %s", ++ this_char); + return 1; + } + +--- a/fs/reiserfs/do_balan.c ++++ b/fs/reiserfs/do_balan.c +@@ -1752,15 +1752,16 @@ static void store_thrown(struct tree_bal + int i; + + if (buffer_dirty(bh)) +- reiserfs_warning(tb->tb_sb, +- "store_thrown deals with dirty buffer"); ++ reiserfs_warning(tb->tb_sb, "reiserfs-12320", ++ "called with dirty buffer"); + for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) + if (!tb->thrown[i]) { + tb->thrown[i] = bh; + get_bh(bh); /* free_thrown puts this */ + return; + } +- reiserfs_warning(tb->tb_sb, "store_thrown: too many thrown buffers"); ++ reiserfs_warning(tb->tb_sb, "reiserfs-12321", ++ "too many thrown buffers"); + } + + static void free_thrown(struct tree_balance *tb) +@@ -1771,8 +1772,8 @@ static void free_thrown(struct tree_bala + if (tb->thrown[i]) { + blocknr = tb->thrown[i]->b_blocknr; + if (buffer_dirty(tb->thrown[i])) +- reiserfs_warning(tb->tb_sb, +- "free_thrown deals with dirty buffer %d", ++ reiserfs_warning(tb->tb_sb, "reiserfs-12322", ++ "called with dirty buffer %d", + blocknr); + brelse(tb->thrown[i]); /* incremented in store_thrown */ + reiserfs_free_block(tb->transaction_handle, NULL, +@@ -1877,13 +1878,12 @@ static void check_internal_node(struct s + } + } + +-static int locked_or_not_in_tree(struct buffer_head *bh, char *which) ++static int locked_or_not_in_tree(struct tree_balance *tb, ++ struct buffer_head *bh, char *which) + { + if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) || + !B_IS_IN_TREE(bh)) { +- reiserfs_warning(NULL, +- "vs-12339: locked_or_not_in_tree: %s (%b)", +- which, bh); ++ reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh); + return 1; + } + return 0; +@@ -1902,18 +1902,19 @@ static int check_before_balancing(struct + /* double check that buffers that we will modify are unlocked. (fix_nodes should already have + prepped all of these for us). */ + if (tb->lnum[0]) { +- retval |= locked_or_not_in_tree(tb->L[0], "L[0]"); +- retval |= locked_or_not_in_tree(tb->FL[0], "FL[0]"); +- retval |= locked_or_not_in_tree(tb->CFL[0], "CFL[0]"); ++ retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]"); ++ retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]"); ++ retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]"); + check_leaf(tb->L[0]); + } + if (tb->rnum[0]) { +- retval |= locked_or_not_in_tree(tb->R[0], "R[0]"); +- retval |= locked_or_not_in_tree(tb->FR[0], "FR[0]"); +- retval |= locked_or_not_in_tree(tb->CFR[0], "CFR[0]"); ++ retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]"); ++ retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]"); ++ retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]"); + check_leaf(tb->R[0]); + } +- retval |= locked_or_not_in_tree(PATH_PLAST_BUFFER(tb->tb_path), "S[0]"); ++ retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path), ++ "S[0]"); + check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); + + return retval; +@@ -1952,7 +1953,7 @@ static void check_after_balance_leaf(str + PATH_H_POSITION(tb->tb_path, + 1)))); + print_cur_tb("12223"); +- reiserfs_warning(tb->tb_sb, ++ reiserfs_warning(tb->tb_sb, "reiserfs-12363", + "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; " + "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d", + left, +@@ -2104,9 +2105,8 @@ void do_balance(struct tree_balance *tb, + } + /* if we have no real work to do */ + if (!tb->insert_size[0]) { +- reiserfs_warning(tb->tb_sb, +- "PAP-12350: do_balance: insert_size == 0, mode == %c", +- flag); ++ reiserfs_warning(tb->tb_sb, "PAP-12350", ++ "insert_size == 0, mode == %c", flag); + unfix_nodes(tb); + return; + } +--- a/fs/reiserfs/file.c ++++ b/fs/reiserfs/file.c +@@ -76,7 +76,7 @@ static int reiserfs_file_release(struct + * and let the admin know what is going on. + */ + igrab(inode); +- reiserfs_warning(inode->i_sb, ++ reiserfs_warning(inode->i_sb, "clm-9001", + "pinning inode %lu because the " + "preallocation can't be freed", + inode->i_ino); +--- a/fs/reiserfs/fix_node.c ++++ b/fs/reiserfs/fix_node.c +@@ -496,8 +496,8 @@ static int get_num_ver(int mode, struct + snum012[needed_nodes - 1 + 3] = units; + + if (needed_nodes > 2) +- reiserfs_warning(tb->tb_sb, "vs-8111: get_num_ver: " +- "split_item_position is out of boundary"); ++ reiserfs_warning(tb->tb_sb, "vs-8111", ++ "split_item_position is out of range"); + snum012[needed_nodes - 1]++; + split_item_positions[needed_nodes - 1] = i; + needed_nodes++; +@@ -533,8 +533,8 @@ static int get_num_ver(int mode, struct + + if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY && + vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT) +- reiserfs_warning(tb->tb_sb, "vs-8115: get_num_ver: not " +- "directory or indirect item"); ++ reiserfs_warning(tb->tb_sb, "vs-8115", ++ "not directory or indirect item"); + } + + /* now we know S2bytes, calculate S1bytes */ +@@ -2268,9 +2268,9 @@ static int wait_tb_buffers_until_unlocke + #ifdef CONFIG_REISERFS_CHECK + repeat_counter++; + if ((repeat_counter % 10000) == 0) { +- reiserfs_warning(p_s_tb->tb_sb, +- "wait_tb_buffers_until_released(): too many " +- "iterations waiting for buffer to unlock " ++ reiserfs_warning(p_s_tb->tb_sb, "reiserfs-8200", ++ "too many iterations waiting " ++ "for buffer to unlock " + "(%b)", locked); + + /* Don't loop forever. Try to recover from possible error. */ +--- a/fs/reiserfs/inode.c ++++ b/fs/reiserfs/inode.c +@@ -842,7 +842,9 @@ int reiserfs_get_block(struct inode *ino + if (retval) { + if (retval != -ENOSPC) + reiserfs_warning(inode->i_sb, +- "clm-6004: convert tail failed inode %lu, error %d", ++ "clm-6004", ++ "convert tail failed " ++ "inode %lu, error %d", + inode->i_ino, + retval); + if (allocated_block_nr) { +@@ -1006,8 +1008,7 @@ int reiserfs_get_block(struct inode *ino + goto failure; + } + if (retval == POSITION_FOUND) { +- reiserfs_warning(inode->i_sb, +- "vs-825: reiserfs_get_block: " ++ reiserfs_warning(inode->i_sb, "vs-825", + "%K should not be found", &key); + retval = -EEXIST; + if (allocated_block_nr) +@@ -1332,9 +1333,9 @@ void reiserfs_update_sd_size(struct reis + /* look for the object's stat data */ + retval = search_item(inode->i_sb, &key, &path); + if (retval == IO_ERROR) { +- reiserfs_warning(inode->i_sb, +- "vs-13050: reiserfs_update_sd: " +- "i/o failure occurred trying to update %K stat data", ++ reiserfs_warning(inode->i_sb, "vs-13050", ++ "i/o failure occurred trying to " ++ "update %K stat data", + &key); + return; + } +@@ -1345,9 +1346,9 @@ void reiserfs_update_sd_size(struct reis + /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */ + return; + } +- reiserfs_warning(inode->i_sb, +- "vs-13060: reiserfs_update_sd: " +- "stat data of object %k (nlink == %d) not found (pos %d)", ++ reiserfs_warning(inode->i_sb, "vs-13060", ++ "stat data of object %k (nlink == %d) " ++ "not found (pos %d)", + INODE_PKEY(inode), inode->i_nlink, + pos); + reiserfs_check_path(&path); +@@ -1424,10 +1425,9 @@ void reiserfs_read_locked_inode(struct i + /* look for the object's stat data */ + retval = search_item(inode->i_sb, &key, &path_to_sd); + if (retval == IO_ERROR) { +- reiserfs_warning(inode->i_sb, +- "vs-13070: reiserfs_read_locked_inode: " +- "i/o failure occurred trying to find stat data of %K", +- &key); ++ reiserfs_warning(inode->i_sb, "vs-13070", ++ "i/o failure occurred trying to find " ++ "stat data of %K", &key); + reiserfs_make_bad_inode(inode); + return; + } +@@ -1457,8 +1457,7 @@ void reiserfs_read_locked_inode(struct i + during mount (fs/reiserfs/super.c:finish_unfinished()). */ + if ((inode->i_nlink == 0) && + !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { +- reiserfs_warning(inode->i_sb, +- "vs-13075: reiserfs_read_locked_inode: " ++ reiserfs_warning(inode->i_sb, "vs-13075", + "dead inode read from disk %K. " + "This is likely to be race with knfsd. Ignore", + &key); +@@ -1564,7 +1563,7 @@ struct dentry *reiserfs_fh_to_dentry(str + */ + if (fh_type > fh_len) { + if (fh_type != 6 || fh_len != 5) +- reiserfs_warning(sb, ++ reiserfs_warning(sb, "reiserfs-13077", + "nfsd/reiserfs, fhtype=%d, len=%d - odd", + fh_type, fh_len); + fh_type = 5; +@@ -1689,13 +1688,13 @@ static int reiserfs_new_directory(struct + /* look for place in the tree for new item */ + retval = search_item(sb, &key, path); + if (retval == IO_ERROR) { +- reiserfs_warning(sb, "vs-13080: reiserfs_new_directory: " ++ reiserfs_warning(sb, "vs-13080", + "i/o failure occurred creating new directory"); + return -EIO; + } + if (retval == ITEM_FOUND) { + pathrelse(path); +- reiserfs_warning(sb, "vs-13070: reiserfs_new_directory: " ++ reiserfs_warning(sb, "vs-13070", + "object with this key exists (%k)", + &(ih->ih_key)); + return -EEXIST; +@@ -1729,13 +1728,13 @@ static int reiserfs_new_symlink(struct r + /* look for place in the tree for new item */ + retval = search_item(sb, &key, path); + if (retval == IO_ERROR) { +- reiserfs_warning(sb, "vs-13080: reiserfs_new_symlinik: " ++ reiserfs_warning(sb, "vs-13080", + "i/o failure occurred creating new symlink"); + return -EIO; + } + if (retval == ITEM_FOUND) { + pathrelse(path); +- reiserfs_warning(sb, "vs-13080: reiserfs_new_symlink: " ++ reiserfs_warning(sb, "vs-13080", + "object with this key exists (%k)", + &(ih->ih_key)); + return -EEXIST; +@@ -1932,7 +1931,8 @@ int reiserfs_new_inode(struct reiserfs_t + goto out_inserted_sd; + } + } else if (inode->i_sb->s_flags & MS_POSIXACL) { +- reiserfs_warning(inode->i_sb, "ACLs aren't enabled in the fs, " ++ reiserfs_warning(inode->i_sb, "jdm-13090", ++ "ACLs aren't enabled in the fs, " + "but vfs thinks they are!"); + } else if (is_reiserfs_priv_object(dir)) { + reiserfs_mark_inode_private(inode); +@@ -2049,8 +2049,8 @@ static int grab_tail_page(struct inode * + ** I've screwed up the code to find the buffer, or the code to + ** call prepare_write + */ +- reiserfs_warning(p_s_inode->i_sb, +- "clm-6000: error reading block %lu on dev %s", ++ reiserfs_warning(p_s_inode->i_sb, "clm-6000", ++ "error reading block %lu on dev %s", + bh->b_blocknr, + reiserfs_bdevname(p_s_inode->i_sb)); + error = -EIO; +@@ -2094,8 +2094,8 @@ int reiserfs_truncate_file(struct inode + // and get_block_create_0 could not find a block to read in, + // which is ok. + if (error != -ENOENT) +- reiserfs_warning(p_s_inode->i_sb, +- "clm-6001: grab_tail_page failed %d", ++ reiserfs_warning(p_s_inode->i_sb, "clm-6001", ++ "grab_tail_page failed %d", + error); + page = NULL; + bh = NULL; +@@ -2213,9 +2213,8 @@ static int map_block_for_writepage(struc + /* we've found an unformatted node */ + if (indirect_item_found(retval, ih)) { + if (bytes_copied > 0) { +- reiserfs_warning(inode->i_sb, +- "clm-6002: bytes_copied %d", +- bytes_copied); ++ reiserfs_warning(inode->i_sb, "clm-6002", ++ "bytes_copied %d", bytes_copied); + } + if (!get_block_num(item, pos_in_item)) { + /* crap, we are writing to a hole */ +@@ -2272,9 +2271,8 @@ static int map_block_for_writepage(struc + goto research; + } + } else { +- reiserfs_warning(inode->i_sb, +- "clm-6003: bad item inode %lu, device %s", +- inode->i_ino, reiserfs_bdevname(inode->i_sb)); ++ reiserfs_warning(inode->i_sb, "clm-6003", ++ "bad item inode %lu", inode->i_ino); + retval = -EIO; + goto out; + } +--- a/fs/reiserfs/item_ops.c ++++ b/fs/reiserfs/item_ops.c +@@ -97,7 +97,8 @@ static int sd_unit_num(struct virtual_it + + static void sd_print_vi(struct virtual_item *vi) + { +- reiserfs_warning(NULL, "STATDATA, index %d, type 0x%x, %h", ++ reiserfs_warning(NULL, "reiserfs-16100", ++ "STATDATA, index %d, type 0x%x, %h", + vi->vi_index, vi->vi_type, vi->vi_ih); + } + +@@ -190,7 +191,8 @@ static int direct_unit_num(struct virtua + + static void direct_print_vi(struct virtual_item *vi) + { +- reiserfs_warning(NULL, "DIRECT, index %d, type 0x%x, %h", ++ reiserfs_warning(NULL, "reiserfs-16101", ++ "DIRECT, index %d, type 0x%x, %h", + vi->vi_index, vi->vi_type, vi->vi_ih); + } + +@@ -278,7 +280,7 @@ static void indirect_print_item(struct i + unp = (__le32 *) item; + + if (ih_item_len(ih) % UNFM_P_SIZE) +- reiserfs_warning(NULL, "indirect_print_item: invalid item len"); ++ reiserfs_warning(NULL, "reiserfs-16102", "invalid item len"); + + printk("%d pointers\n[ ", (int)I_UNFM_NUM(ih)); + for (j = 0; j < I_UNFM_NUM(ih); j++) { +@@ -334,7 +336,8 @@ static int indirect_unit_num(struct virt + + static void indirect_print_vi(struct virtual_item *vi) + { +- reiserfs_warning(NULL, "INDIRECT, index %d, type 0x%x, %h", ++ reiserfs_warning(NULL, "reiserfs-16103", ++ "INDIRECT, index %d, type 0x%x, %h", + vi->vi_index, vi->vi_type, vi->vi_ih); + } + +@@ -359,7 +362,7 @@ static struct item_operations indirect_o + + static int direntry_bytes_number(struct item_head *ih, int block_size) + { +- reiserfs_warning(NULL, "vs-16090: direntry_bytes_number: " ++ reiserfs_warning(NULL, "vs-16090", + "bytes number is asked for direntry"); + return 0; + } +@@ -614,7 +617,8 @@ static void direntry_print_vi(struct vir + int i; + struct direntry_uarea *dir_u = vi->vi_uarea; + +- reiserfs_warning(NULL, "DIRENTRY, index %d, type 0x%x, %h, flags 0x%x", ++ reiserfs_warning(NULL, "reiserfs-16104", ++ "DIRENTRY, index %d, type 0x%x, %h, flags 0x%x", + vi->vi_index, vi->vi_type, vi->vi_ih, dir_u->flags); + printk("%d entries: ", dir_u->entry_count); + for (i = 0; i < dir_u->entry_count; i++) +@@ -642,43 +646,43 @@ static struct item_operations direntry_o + // + static int errcatch_bytes_number(struct item_head *ih, int block_size) + { +- reiserfs_warning(NULL, +- "green-16001: Invalid item type observed, run fsck ASAP"); ++ reiserfs_warning(NULL, "green-16001", ++ "Invalid item type observed, run fsck ASAP"); + return 0; + } + + static void errcatch_decrement_key(struct cpu_key *key) + { +- reiserfs_warning(NULL, +- "green-16002: Invalid item type observed, run fsck ASAP"); ++ reiserfs_warning(NULL, "green-16002", ++ "Invalid item type observed, run fsck ASAP"); + } + + static int errcatch_is_left_mergeable(struct reiserfs_key *key, + unsigned long bsize) + { +- reiserfs_warning(NULL, +- "green-16003: Invalid item type observed, run fsck ASAP"); ++ reiserfs_warning(NULL, "green-16003", ++ "Invalid item type observed, run fsck ASAP"); + return 0; + } + + static void errcatch_print_item(struct item_head *ih, char *item) + { +- reiserfs_warning(NULL, +- "green-16004: Invalid item type observed, run fsck ASAP"); ++ reiserfs_warning(NULL, "green-16004", ++ "Invalid item type observed, run fsck ASAP"); + } + + static void errcatch_check_item(struct item_head *ih, char *item) + { +- reiserfs_warning(NULL, +- "green-16005: Invalid item type observed, run fsck ASAP"); ++ reiserfs_warning(NULL, "green-16005", ++ "Invalid item type observed, run fsck ASAP"); + } + + static int errcatch_create_vi(struct virtual_node *vn, + struct virtual_item *vi, + int is_affected, int insert_size) + { +- reiserfs_warning(NULL, +- "green-16006: Invalid item type observed, run fsck ASAP"); ++ reiserfs_warning(NULL, "green-16006", ++ "Invalid item type observed, run fsck ASAP"); + return 0; // We might return -1 here as well, but it won't help as create_virtual_node() from where + // this operation is called from is of return type void. + } +@@ -686,36 +690,36 @@ static int errcatch_create_vi(struct vir + static int errcatch_check_left(struct virtual_item *vi, int free, + int start_skip, int end_skip) + { +- reiserfs_warning(NULL, +- "green-16007: Invalid item type observed, run fsck ASAP"); ++ reiserfs_warning(NULL, "green-16007", ++ "Invalid item type observed, run fsck ASAP"); + return -1; + } + + static int errcatch_check_right(struct virtual_item *vi, int free) + { +- reiserfs_warning(NULL, +- "green-16008: Invalid item type observed, run fsck ASAP"); ++ reiserfs_warning(NULL, "green-16008", ++ "Invalid item type observed, run fsck ASAP"); + return -1; + } + + static int errcatch_part_size(struct virtual_item *vi, int first, int count) + { +- reiserfs_warning(NULL, +- "green-16009: Invalid item type observed, run fsck ASAP"); ++ reiserfs_warning(NULL, "green-16009", ++ "Invalid item type observed, run fsck ASAP"); + return 0; + } + + static int errcatch_unit_num(struct virtual_item *vi) + { +- reiserfs_warning(NULL, +- "green-16010: Invalid item type observed, run fsck ASAP"); ++ reiserfs_warning(NULL, "green-16010", ++ "Invalid item type observed, run fsck ASAP"); + return 0; + } + + static void errcatch_print_vi(struct virtual_item *vi) + { +- reiserfs_warning(NULL, +- "green-16011: Invalid item type observed, run fsck ASAP"); ++ reiserfs_warning(NULL, "green-16011", ++ "Invalid item type observed, run fsck ASAP"); + } + + static struct item_operations errcatch_ops = { +--- a/fs/reiserfs/journal.c ++++ b/fs/reiserfs/journal.c +@@ -300,8 +300,8 @@ int reiserfs_allocate_list_bitmaps(struc + jb->journal_list = NULL; + jb->bitmaps = vmalloc(mem); + if (!jb->bitmaps) { +- reiserfs_warning(p_s_sb, +- "clm-2000, unable to allocate bitmaps for journal lists"); ++ reiserfs_warning(p_s_sb, "clm-2000", "unable to " ++ "allocate bitmaps for journal lists"); + failed = 1; + break; + } +@@ -644,8 +644,8 @@ static void reiserfs_end_buffer_io_sync( + char b[BDEVNAME_SIZE]; + + if (buffer_journaled(bh)) { +- reiserfs_warning(NULL, +- "clm-2084: pinned buffer %lu:%s sent to disk", ++ reiserfs_warning(NULL, "clm-2084", ++ "pinned buffer %lu:%s sent to disk", + bh->b_blocknr, bdevname(bh->b_bdev, b)); + } + if (uptodate) +@@ -1122,7 +1122,8 @@ static int flush_commit_list(struct supe + sync_dirty_buffer(tbh); + if (unlikely(!buffer_uptodate(tbh))) { + #ifdef CONFIG_REISERFS_CHECK +- reiserfs_warning(s, "journal-601, buffer write failed"); ++ reiserfs_warning(s, "journal-601", ++ "buffer write failed"); + #endif + retval = -EIO; + } +@@ -1154,14 +1155,14 @@ static int flush_commit_list(struct supe + * up propagating the write error out to the filesystem. */ + if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { + #ifdef CONFIG_REISERFS_CHECK +- reiserfs_warning(s, "journal-615: buffer write failed"); ++ reiserfs_warning(s, "journal-615", "buffer write failed"); + #endif + retval = -EIO; + } + bforget(jl->j_commit_bh); + if (journal->j_last_commit_id != 0 && + (jl->j_trans_id - journal->j_last_commit_id) != 1) { +- reiserfs_warning(s, "clm-2200: last commit %lu, current %lu", ++ reiserfs_warning(s, "clm-2200", "last commit %lu, current %lu", + journal->j_last_commit_id, jl->j_trans_id); + } + journal->j_last_commit_id = jl->j_trans_id; +@@ -1250,7 +1251,7 @@ static void remove_all_from_journal_list + while (cn) { + if (cn->blocknr != 0) { + if (debug) { +- reiserfs_warning(p_s_sb, ++ reiserfs_warning(p_s_sb, "reiserfs-2201", + "block %u, bh is %d, state %ld", + cn->blocknr, cn->bh ? 1 : 0, + cn->state); +@@ -1288,8 +1289,8 @@ static int _update_journal_header_block( + wait_on_buffer((journal->j_header_bh)); + if (unlikely(!buffer_uptodate(journal->j_header_bh))) { + #ifdef CONFIG_REISERFS_CHECK +- reiserfs_warning(p_s_sb, +- "journal-699: buffer write failed"); ++ reiserfs_warning(p_s_sb, "journal-699", ++ "buffer write failed"); + #endif + return -EIO; + } +@@ -1319,8 +1320,8 @@ static int _update_journal_header_block( + sync_dirty_buffer(journal->j_header_bh); + } + if (!buffer_uptodate(journal->j_header_bh)) { +- reiserfs_warning(p_s_sb, +- "journal-837: IO error during journal replay"); ++ reiserfs_warning(p_s_sb, "journal-837", ++ "IO error during journal replay"); + return -EIO; + } + } +@@ -1401,8 +1402,7 @@ static int flush_journal_list(struct sup + BUG_ON(j_len_saved <= 0); + + if (atomic_read(&journal->j_wcount) != 0) { +- reiserfs_warning(s, +- "clm-2048: flush_journal_list called with wcount %d", ++ reiserfs_warning(s, "clm-2048", "called with wcount %d", + atomic_read(&journal->j_wcount)); + } + BUG_ON(jl->j_trans_id == 0); +@@ -1510,8 +1510,8 @@ static int flush_journal_list(struct sup + ** is not marked JDirty_wait + */ + if ((!was_jwait) && !buffer_locked(saved_bh)) { +- reiserfs_warning(s, +- "journal-813: BAD! buffer %llu %cdirty %cjwait, " ++ reiserfs_warning(s, "journal-813", ++ "BAD! buffer %llu %cdirty %cjwait, " + "not in a newer tranasction", + (unsigned long long)saved_bh-> + b_blocknr, was_dirty ? ' ' : '!', +@@ -1529,8 +1529,8 @@ static int flush_journal_list(struct sup + unlock_buffer(saved_bh); + count++; + } else { +- reiserfs_warning(s, +- "clm-2082: Unable to flush buffer %llu in %s", ++ reiserfs_warning(s, "clm-2082", ++ "Unable to flush buffer %llu in %s", + (unsigned long long)saved_bh-> + b_blocknr, __func__); + } +@@ -1541,8 +1541,8 @@ static int flush_journal_list(struct sup + /* we incremented this to keep others from taking the buffer head away */ + put_bh(saved_bh); + if (atomic_read(&(saved_bh->b_count)) < 0) { +- reiserfs_warning(s, +- "journal-945: saved_bh->b_count < 0"); ++ reiserfs_warning(s, "journal-945", ++ "saved_bh->b_count < 0"); + } + } + } +@@ -1561,8 +1561,8 @@ static int flush_journal_list(struct sup + } + if (unlikely(!buffer_uptodate(cn->bh))) { + #ifdef CONFIG_REISERFS_CHECK +- reiserfs_warning(s, +- "journal-949: buffer write failed\n"); ++ reiserfs_warning(s, "journal-949", ++ "buffer write failed"); + #endif + err = -EIO; + } +@@ -1623,7 +1623,7 @@ static int flush_journal_list(struct sup + + if (journal->j_last_flush_id != 0 && + (jl->j_trans_id - journal->j_last_flush_id) != 1) { +- reiserfs_warning(s, "clm-2201: last flush %lu, current %lu", ++ reiserfs_warning(s, "clm-2201", "last flush %lu, current %lu", + journal->j_last_flush_id, jl->j_trans_id); + } + journal->j_last_flush_id = jl->j_trans_id; +@@ -2058,8 +2058,9 @@ static int journal_transaction_is_valid( + return -1; + } + if (get_desc_trans_len(desc) > SB_JOURNAL(p_s_sb)->j_trans_max) { +- reiserfs_warning(p_s_sb, +- "journal-2018: Bad transaction length %d encountered, ignoring transaction", ++ reiserfs_warning(p_s_sb, "journal-2018", ++ "Bad transaction length %d " ++ "encountered, ignoring transaction", + get_desc_trans_len(desc)); + return -1; + } +@@ -2195,8 +2196,8 @@ static int journal_read_transaction(stru + brelse(d_bh); + kfree(log_blocks); + kfree(real_blocks); +- reiserfs_warning(p_s_sb, +- "journal-1169: kmalloc failed, unable to mount FS"); ++ reiserfs_warning(p_s_sb, "journal-1169", ++ "kmalloc failed, unable to mount FS"); + return -1; + } + /* get all the buffer heads */ +@@ -2218,15 +2219,18 @@ static int journal_read_transaction(stru + j_realblock[i - trans_half])); + } + if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) { +- reiserfs_warning(p_s_sb, +- "journal-1207: REPLAY FAILURE fsck required! Block to replay is outside of filesystem"); ++ reiserfs_warning(p_s_sb, "journal-1207", ++ "REPLAY FAILURE fsck required! " ++ "Block to replay is outside of " ++ "filesystem"); + goto abort_replay; + } + /* make sure we don't try to replay onto log or reserved area */ + if (is_block_in_log_or_reserved_area + (p_s_sb, real_blocks[i]->b_blocknr)) { +- reiserfs_warning(p_s_sb, +- "journal-1204: REPLAY FAILURE fsck required! Trying to replay onto a log block"); ++ reiserfs_warning(p_s_sb, "journal-1204", ++ "REPLAY FAILURE fsck required! " ++ "Trying to replay onto a log block"); + abort_replay: + brelse_array(log_blocks, i); + brelse_array(real_blocks, i); +@@ -2242,8 +2246,9 @@ static int journal_read_transaction(stru + for (i = 0; i < get_desc_trans_len(desc); i++) { + wait_on_buffer(log_blocks[i]); + if (!buffer_uptodate(log_blocks[i])) { +- reiserfs_warning(p_s_sb, +- "journal-1212: REPLAY FAILURE fsck required! buffer write failed"); ++ reiserfs_warning(p_s_sb, "journal-1212", ++ "REPLAY FAILURE fsck required! " ++ "buffer write failed"); + brelse_array(log_blocks + i, + get_desc_trans_len(desc) - i); + brelse_array(real_blocks, get_desc_trans_len(desc)); +@@ -2266,8 +2271,9 @@ static int journal_read_transaction(stru + for (i = 0; i < get_desc_trans_len(desc); i++) { + wait_on_buffer(real_blocks[i]); + if (!buffer_uptodate(real_blocks[i])) { +- reiserfs_warning(p_s_sb, +- "journal-1226: REPLAY FAILURE, fsck required! buffer write failed"); ++ reiserfs_warning(p_s_sb, "journal-1226", ++ "REPLAY FAILURE, fsck required! " ++ "buffer write failed"); + brelse_array(real_blocks + i, + get_desc_trans_len(desc) - i); + brelse(c_bh); +@@ -2418,8 +2424,8 @@ static int journal_read(struct super_blo + } + + if (continue_replay && bdev_read_only(p_s_sb->s_bdev)) { +- reiserfs_warning(p_s_sb, +- "clm-2076: device is readonly, unable to replay log"); ++ reiserfs_warning(p_s_sb, "clm-2076", ++ "device is readonly, unable to replay log"); + return -1; + } + +@@ -2580,9 +2586,8 @@ static int release_journal_dev(struct su + } + + if (result != 0) { +- reiserfs_warning(super, +- "sh-457: release_journal_dev: Cannot release journal device: %i", +- result); ++ reiserfs_warning(super, "sh-457", ++ "Cannot release journal device: %i", result); + } + return result; + } +@@ -2611,7 +2616,7 @@ static int journal_init_dev(struct super + if (IS_ERR(journal->j_dev_bd)) { + result = PTR_ERR(journal->j_dev_bd); + journal->j_dev_bd = NULL; +- reiserfs_warning(super, "sh-458: journal_init_dev: " ++ reiserfs_warning(super, "sh-458", + "cannot init journal device '%s': %i", + __bdevname(jdev, b), result); + return result; +@@ -2673,16 +2678,16 @@ static int check_advise_trans_params(str + journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio || + SB_ONDISK_JOURNAL_SIZE(p_s_sb) / journal->j_trans_max < + JOURNAL_MIN_RATIO) { +- reiserfs_warning(p_s_sb, +- "sh-462: bad transaction max size (%u). FSCK?", +- journal->j_trans_max); ++ reiserfs_warning(p_s_sb, "sh-462", ++ "bad transaction max size (%u). " ++ "FSCK?", journal->j_trans_max); + return 1; + } + if (journal->j_max_batch != (journal->j_trans_max) * + JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) { +- reiserfs_warning(p_s_sb, +- "sh-463: bad transaction max batch (%u). FSCK?", +- journal->j_max_batch); ++ reiserfs_warning(p_s_sb, "sh-463", ++ "bad transaction max batch (%u). " ++ "FSCK?", journal->j_max_batch); + return 1; + } + } else { +@@ -2690,9 +2695,11 @@ static int check_advise_trans_params(str + The file system was created by old version + of mkreiserfs, so some fields contain zeros, + and we need to advise proper values for them */ +- if (p_s_sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) +- reiserfs_panic(p_s_sb, "sh-464: bad blocksize (%u)", +- p_s_sb->s_blocksize); ++ if (p_s_sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) { ++ reiserfs_warning(p_s_sb, "sh-464", "bad blocksize (%u)", ++ p_s_sb->s_blocksize); ++ return 1; ++ } + journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT; + journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT; + journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE; +@@ -2716,8 +2723,8 @@ int journal_init(struct super_block *p_s + + journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof(struct reiserfs_journal)); + if (!journal) { +- reiserfs_warning(p_s_sb, +- "journal-1256: unable to get memory for journal structure"); ++ reiserfs_warning(p_s_sb, "journal-1256", ++ "unable to get memory for journal structure"); + return 1; + } + memset(journal, 0, sizeof(struct reiserfs_journal)); +@@ -2746,9 +2753,9 @@ int journal_init(struct super_block *p_s + if (!SB_ONDISK_JOURNAL_DEVICE(p_s_sb) && + (SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) + + SB_ONDISK_JOURNAL_SIZE(p_s_sb) > p_s_sb->s_blocksize * 8)) { +- reiserfs_warning(p_s_sb, +- "journal-1393: journal does not fit for area " +- "addressed by first of bitmap blocks. It starts at " ++ reiserfs_warning(p_s_sb, "journal-1393", ++ "journal does not fit for area addressed " ++ "by first of bitmap blocks. It starts at " + "%u and its size is %u. Block size %ld", + SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb), + SB_ONDISK_JOURNAL_SIZE(p_s_sb), +@@ -2757,8 +2764,8 @@ int journal_init(struct super_block *p_s + } + + if (journal_init_dev(p_s_sb, journal, j_dev_name) != 0) { +- reiserfs_warning(p_s_sb, +- "sh-462: unable to initialize jornal device"); ++ reiserfs_warning(p_s_sb, "sh-462", ++ "unable to initialize jornal device"); + goto free_and_return; + } + +@@ -2769,8 +2776,8 @@ int journal_init(struct super_block *p_s + SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + + SB_ONDISK_JOURNAL_SIZE(p_s_sb)); + if (!bhjh) { +- reiserfs_warning(p_s_sb, +- "sh-459: unable to read journal header"); ++ reiserfs_warning(p_s_sb, "sh-459", ++ "unable to read journal header"); + goto free_and_return; + } + jh = (struct reiserfs_journal_header *)(bhjh->b_data); +@@ -2779,10 +2786,10 @@ int journal_init(struct super_block *p_s + if (is_reiserfs_jr(rs) + && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != + sb_jp_journal_magic(rs))) { +- reiserfs_warning(p_s_sb, +- "sh-460: journal header magic %x " +- "(device %s) does not match to magic found in super " +- "block %x", jh->jh_journal.jp_journal_magic, ++ reiserfs_warning(p_s_sb, "sh-460", ++ "journal header magic %x (device %s) does " ++ "not match to magic found in super block %x", ++ jh->jh_journal.jp_journal_magic, + bdevname(journal->j_dev_bd, b), + sb_jp_journal_magic(rs)); + brelse(bhjh); +@@ -2849,7 +2856,7 @@ int journal_init(struct super_block *p_s + journal->j_must_wait = 0; + + if (journal->j_cnode_free == 0) { +- reiserfs_warning(p_s_sb, "journal-2004: Journal cnode memory " ++ reiserfs_warning(p_s_sb, "journal-2004", "Journal cnode memory " + "allocation failed (%ld bytes). Journal is " + "too large for available memory. Usually " + "this is due to a journal that is too large.", +@@ -2861,12 +2868,13 @@ int journal_init(struct super_block *p_s + jl = journal->j_current_jl; + jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl); + if (!jl->j_list_bitmap) { +- reiserfs_warning(p_s_sb, +- "journal-2005, get_list_bitmap failed for journal list 0"); ++ reiserfs_warning(p_s_sb, "journal-2005", ++ "get_list_bitmap failed for journal list 0"); + goto free_and_return; + } + if (journal_read(p_s_sb) < 0) { +- reiserfs_warning(p_s_sb, "Replay Failure, unable to mount"); ++ reiserfs_warning(p_s_sb, "reiserfs-2006", ++ "Replay Failure, unable to mount"); + goto free_and_return; + } + +@@ -3193,16 +3201,17 @@ int journal_begin(struct reiserfs_transa + cur_th->t_refcount++; + memcpy(th, cur_th, sizeof(*th)); + if (th->t_refcount <= 1) +- reiserfs_warning(p_s_sb, +- "BAD: refcount <= 1, but journal_info != 0"); ++ reiserfs_warning(p_s_sb, "reiserfs-2005", ++ "BAD: refcount <= 1, but " ++ "journal_info != 0"); + return 0; + } else { + /* we've ended up with a handle from a different filesystem. + ** save it and restore on journal_end. This should never + ** really happen... + */ +- reiserfs_warning(p_s_sb, +- "clm-2100: nesting info a different FS"); ++ reiserfs_warning(p_s_sb, "clm-2100", ++ "nesting info a different FS"); + th->t_handle_save = current->journal_info; + current->journal_info = th; + } +@@ -3263,7 +3272,8 @@ int journal_mark_dirty(struct reiserfs_t + ** could get to disk too early. NOT GOOD. + */ + if (!prepared || buffer_dirty(bh)) { +- reiserfs_warning(p_s_sb, "journal-1777: buffer %llu bad state " ++ reiserfs_warning(p_s_sb, "journal-1777", ++ "buffer %llu bad state " + "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT", + (unsigned long long)bh->b_blocknr, + prepared ? ' ' : '!', +@@ -3273,8 +3283,8 @@ int journal_mark_dirty(struct reiserfs_t + } + + if (atomic_read(&(journal->j_wcount)) <= 0) { +- reiserfs_warning(p_s_sb, +- "journal-1409: journal_mark_dirty returning because j_wcount was %d", ++ reiserfs_warning(p_s_sb, "journal-1409", ++ "returning because j_wcount was %d", + atomic_read(&(journal->j_wcount))); + return 1; + } +@@ -3339,8 +3349,8 @@ int journal_end(struct reiserfs_transact + struct super_block *p_s_sb, unsigned long nblocks) + { + if (!current->journal_info && th->t_refcount > 1) +- reiserfs_warning(p_s_sb, "REISER-NESTING: th NULL, refcount %d", +- th->t_refcount); ++ reiserfs_warning(p_s_sb, "REISER-NESTING", ++ "th NULL, refcount %d", th->t_refcount); + + if (!th->t_trans_id) { + WARN_ON(1); +@@ -3410,8 +3420,8 @@ static int remove_from_transaction(struc + clear_buffer_journal_test(bh); + put_bh(bh); + if (atomic_read(&(bh->b_count)) < 0) { +- reiserfs_warning(p_s_sb, +- "journal-1752: remove from trans, b_count < 0"); ++ reiserfs_warning(p_s_sb, "journal-1752", ++ "b_count < 0"); + } + ret = 1; + } +@@ -3731,7 +3741,8 @@ int journal_mark_freed(struct reiserfs_t + if (atomic_read + (&(cn->bh->b_count)) < 0) { + reiserfs_warning(p_s_sb, +- "journal-2138: cn->bh->b_count < 0"); ++ "journal-2138", ++ "cn->bh->b_count < 0"); + } + } + if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ +@@ -4134,8 +4145,9 @@ static int do_journal_end(struct reiserf + clear_buffer_journaled(cn->bh); + } else { + /* JDirty cleared sometime during transaction. don't log this one */ +- reiserfs_warning(p_s_sb, +- "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!"); ++ reiserfs_warning(p_s_sb, "journal-2048", ++ "BAD, buffer in journal hash, " ++ "but not JDirty!"); + brelse(cn->bh); + } + next = cn->next; +--- a/fs/reiserfs/lbalance.c ++++ b/fs/reiserfs/lbalance.c +@@ -1288,12 +1288,16 @@ void leaf_paste_entries(struct buffer_in + prev = (i != 0) ? deh_location(&(deh[i - 1])) : 0; + + if (prev && prev <= deh_location(&(deh[i]))) +- reiserfs_warning(NULL, +- "vs-10240: leaf_paste_entries: directory item (%h) corrupted (prev %a, cur(%d) %a)", ++ reiserfs_warning(NULL, "vs-10240", ++ "directory item (%h) " ++ "corrupted (prev %a, " ++ "cur(%d) %a)", + ih, deh + i - 1, i, deh + i); + if (next && next >= deh_location(&(deh[i]))) +- reiserfs_warning(NULL, +- "vs-10250: leaf_paste_entries: directory item (%h) corrupted (cur(%d) %a, next %a)", ++ reiserfs_warning(NULL, "vs-10250", ++ "directory item (%h) " ++ "corrupted (cur(%d) %a, " ++ "next %a)", + ih, i, deh + i, deh + i + 1); + } + } +--- a/fs/reiserfs/namei.c ++++ b/fs/reiserfs/namei.c +@@ -120,8 +120,8 @@ int search_by_entry_key(struct super_blo + switch (retval) { + case ITEM_NOT_FOUND: + if (!PATH_LAST_POSITION(path)) { +- reiserfs_warning(sb, +- "vs-7000: search_by_entry_key: search_by_key returned item position == 0"); ++ reiserfs_warning(sb, "vs-7000", "search_by_key " ++ "returned item position == 0"); + pathrelse(path); + return IO_ERROR; + } +@@ -135,8 +135,7 @@ int search_by_entry_key(struct super_blo + + default: + pathrelse(path); +- reiserfs_warning(sb, +- "vs-7002: search_by_entry_key: no path to here"); ++ reiserfs_warning(sb, "vs-7002", "no path to here"); + return IO_ERROR; + } + +@@ -300,8 +299,7 @@ static int reiserfs_find_entry(struct in + search_by_entry_key(dir->i_sb, &key_to_search, + path_to_entry, de); + if (retval == IO_ERROR) { +- reiserfs_warning(dir->i_sb, "zam-7001: io error in %s", +- __func__); ++ reiserfs_warning(dir->i_sb, "zam-7001", "io error"); + return IO_ERROR; + } + +@@ -493,10 +491,9 @@ static int reiserfs_add_entry(struct rei + } + + if (retval != NAME_FOUND) { +- reiserfs_warning(dir->i_sb, +- "zam-7002:%s: \"reiserfs_find_entry\" " +- "has returned unexpected value (%d)", +- __func__, retval); ++ reiserfs_warning(dir->i_sb, "zam-7002", ++ "reiserfs_find_entry() returned " ++ "unexpected value (%d)", retval); + } + + return -EEXIST; +@@ -507,8 +504,9 @@ static int reiserfs_add_entry(struct rei + MAX_GENERATION_NUMBER + 1); + if (gen_number > MAX_GENERATION_NUMBER) { + /* there is no free generation number */ +- reiserfs_warning(dir->i_sb, +- "reiserfs_add_entry: Congratulations! we have got hash function screwed up"); ++ reiserfs_warning(dir->i_sb, "reiserfs-7010", ++ "Congratulations! we have got hash function " ++ "screwed up"); + if (buffer != small_buf) + kfree(buffer); + pathrelse(&path); +@@ -524,10 +522,9 @@ static int reiserfs_add_entry(struct rei + if (gen_number != 0) { /* we need to re-search for the insertion point */ + if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) != + NAME_NOT_FOUND) { +- reiserfs_warning(dir->i_sb, +- "vs-7032: reiserfs_add_entry: " +- "entry with this key (%K) already exists", +- &entry_key); ++ reiserfs_warning(dir->i_sb, "vs-7032", ++ "entry with this key (%K) already " ++ "exists", &entry_key); + + if (buffer != small_buf) + kfree(buffer); +@@ -906,8 +903,9 @@ static int reiserfs_rmdir(struct inode * + goto end_rmdir; + + if (inode->i_nlink != 2 && inode->i_nlink != 1) +- reiserfs_warning(inode->i_sb, "%s: empty directory has nlink " +- "!= 2 (%d)", __func__, inode->i_nlink); ++ reiserfs_warning(inode->i_sb, "reiserfs-7040", ++ "empty directory has nlink != 2 (%d)", ++ inode->i_nlink); + + clear_nlink(inode); + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; +@@ -983,10 +981,9 @@ static int reiserfs_unlink(struct inode + } + + if (!inode->i_nlink) { +- reiserfs_warning(inode->i_sb, "%s: deleting nonexistent file " +- "(%s:%lu), %d", __func__, +- reiserfs_bdevname(inode->i_sb), inode->i_ino, +- inode->i_nlink); ++ reiserfs_warning(inode->i_sb, "reiserfs-7042", ++ "deleting nonexistent file (%lu), %d", ++ inode->i_ino, inode->i_nlink); + inode->i_nlink = 1; + } + +@@ -1500,8 +1497,8 @@ static int reiserfs_rename(struct inode + if (reiserfs_cut_from_item + (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL, + 0) < 0) +- reiserfs_warning(old_dir->i_sb, +- "vs-7060: reiserfs_rename: couldn't not cut old name. Fsck later?"); ++ reiserfs_warning(old_dir->i_sb, "vs-7060", ++ "couldn't not cut old name. Fsck later?"); + + old_dir->i_size -= DEH_SIZE + old_de.de_entrylen; + +--- a/fs/reiserfs/objectid.c ++++ b/fs/reiserfs/objectid.c +@@ -61,7 +61,7 @@ __u32 reiserfs_get_unused_objectid(struc + /* comment needed -Hans */ + unused_objectid = le32_to_cpu(map[1]); + if (unused_objectid == U32_MAX) { +- reiserfs_warning(s, "%s: no more object ids", __func__); ++ reiserfs_warning(s, "reiserfs-15100", "no more object ids"); + reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s)); + return 0; + } +@@ -160,8 +160,7 @@ void reiserfs_release_objectid(struct re + i += 2; + } + +- reiserfs_warning(s, +- "vs-15011: reiserfs_release_objectid: tried to free free object id (%lu)", ++ reiserfs_warning(s, "vs-15011", "tried to free free object id (%lu)", + (long unsigned)objectid_to_release); + } + +--- a/fs/reiserfs/prints.c ++++ b/fs/reiserfs/prints.c +@@ -264,14 +264,17 @@ static void prepare_error_buf(const char + va_end( args );\ + } + +-void reiserfs_warning(struct super_block *sb, const char *fmt, ...) ++void __reiserfs_warning(struct super_block *sb, const char *id, ++ const char *function, const char *fmt, ...) + { + do_reiserfs_warning(fmt); + if (sb) +- printk(KERN_WARNING "REISERFS warning (device %s): %s\n", +- sb->s_id, error_buf); ++ printk(KERN_WARNING "REISERFS warning (device %s): %s%s%s: " ++ "%s\n", sb->s_id, id ? id : "", id ? " " : "", ++ function, error_buf); + else +- printk(KERN_WARNING "REISERFS warning: %s\n", error_buf); ++ printk(KERN_WARNING "REISERFS warning: %s%s%s: %s\n", ++ id ? id : "", id ? " " : "", function, error_buf); + } + + /* No newline.. reiserfs_info calls can be followed by printk's */ +--- a/fs/reiserfs/procfs.c ++++ b/fs/reiserfs/procfs.c +@@ -505,7 +505,7 @@ int reiserfs_proc_info_init(struct super + add_file(sb, "journal", show_journal); + return 0; + } +- reiserfs_warning(sb, "reiserfs: cannot create /proc/%s/%s", ++ reiserfs_warning(sb, "cannot create /proc/%s/%s", + proc_info_root_name, b); + return 1; + } +@@ -561,8 +561,7 @@ int reiserfs_proc_info_global_init(void) + if (proc_info_root) { + proc_info_root->owner = THIS_MODULE; + } else { +- reiserfs_warning(NULL, +- "reiserfs: cannot create /proc/%s", ++ reiserfs_warning(NULL, "cannot create /proc/%s", + proc_info_root_name); + return 1; + } +--- a/fs/reiserfs/stree.c ++++ b/fs/reiserfs/stree.c +@@ -444,23 +444,24 @@ static int is_leaf(char *buf, int blocks + + blkh = (struct block_head *)buf; + if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) { +- reiserfs_warning(NULL, +- "is_leaf: this should be caught earlier"); ++ reiserfs_warning(NULL, "reiserfs-5080", ++ "this should be caught earlier"); + return 0; + } + + nr = blkh_nr_item(blkh); + if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) { + /* item number is too big or too small */ +- reiserfs_warning(NULL, "is_leaf: nr_item seems wrong: %z", bh); ++ reiserfs_warning(NULL, "reiserfs-5081", ++ "nr_item seems wrong: %z", bh); + return 0; + } + ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; + used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih)); + if (used_space != blocksize - blkh_free_space(blkh)) { + /* free space does not match to calculated amount of use space */ +- reiserfs_warning(NULL, "is_leaf: free space seems wrong: %z", +- bh); ++ reiserfs_warning(NULL, "reiserfs-5082", ++ "free space seems wrong: %z", bh); + return 0; + } + // FIXME: it is_leaf will hit performance too much - we may have +@@ -471,29 +472,29 @@ static int is_leaf(char *buf, int blocks + prev_location = blocksize; + for (i = 0; i < nr; i++, ih++) { + if (le_ih_k_type(ih) == TYPE_ANY) { +- reiserfs_warning(NULL, +- "is_leaf: wrong item type for item %h", ++ reiserfs_warning(NULL, "reiserfs-5083", ++ "wrong item type for item %h", + ih); + return 0; + } + if (ih_location(ih) >= blocksize + || ih_location(ih) < IH_SIZE * nr) { +- reiserfs_warning(NULL, +- "is_leaf: item location seems wrong: %h", ++ reiserfs_warning(NULL, "reiserfs-5084", ++ "item location seems wrong: %h", + ih); + return 0; + } + if (ih_item_len(ih) < 1 + || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) { +- reiserfs_warning(NULL, +- "is_leaf: item length seems wrong: %h", ++ reiserfs_warning(NULL, "reiserfs-5085", ++ "item length seems wrong: %h", + ih); + return 0; + } + if (prev_location - ih_location(ih) != ih_item_len(ih)) { +- reiserfs_warning(NULL, +- "is_leaf: item location seems wrong (second one): %h", +- ih); ++ reiserfs_warning(NULL, "reiserfs-5086", ++ "item location seems wrong " ++ "(second one): %h", ih); + return 0; + } + prev_location = ih_location(ih); +@@ -514,24 +515,23 @@ static int is_internal(char *buf, int bl + nr = blkh_level(blkh); + if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) { + /* this level is not possible for internal nodes */ +- reiserfs_warning(NULL, +- "is_internal: this should be caught earlier"); ++ reiserfs_warning(NULL, "reiserfs-5087", ++ "this should be caught earlier"); + return 0; + } + + nr = blkh_nr_item(blkh); + if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) { + /* for internal which is not root we might check min number of keys */ +- reiserfs_warning(NULL, +- "is_internal: number of key seems wrong: %z", +- bh); ++ reiserfs_warning(NULL, "reiserfs-5088", ++ "number of key seems wrong: %z", bh); + return 0; + } + + used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1); + if (used_space != blocksize - blkh_free_space(blkh)) { +- reiserfs_warning(NULL, +- "is_internal: free space seems wrong: %z", bh); ++ reiserfs_warning(NULL, "reiserfs-5089", ++ "free space seems wrong: %z", bh); + return 0; + } + // one may imagine much more checks +@@ -543,8 +543,8 @@ static int is_internal(char *buf, int bl + static int is_tree_node(struct buffer_head *bh, int level) + { + if (B_LEVEL(bh) != level) { +- reiserfs_warning(NULL, +- "is_tree_node: node level %d does not match to the expected one %d", ++ reiserfs_warning(NULL, "reiserfs-5090", "node level %d does " ++ "not match to the expected one %d", + B_LEVEL(bh), level); + return 0; + } +@@ -645,9 +645,9 @@ int search_by_key(struct super_block *p_ + + #ifdef CONFIG_REISERFS_CHECK + if (!(++n_repeat_counter % 50000)) +- reiserfs_warning(p_s_sb, "PAP-5100: search_by_key: %s:" +- "there were %d iterations of while loop " +- "looking for key %K", ++ reiserfs_warning(p_s_sb, "PAP-5100", ++ "%s: there were %d iterations of " ++ "while loop looking for key %K", + current->comm, n_repeat_counter, + p_s_key); + #endif +@@ -721,9 +721,9 @@ int search_by_key(struct super_block *p_ + // make sure, that the node contents look like a node of + // certain level + if (!is_tree_node(p_s_bh, expected_level)) { +- reiserfs_warning(p_s_sb, "vs-5150: search_by_key: " +- "invalid format found in block %ld. Fsck?", +- p_s_bh->b_blocknr); ++ reiserfs_warning(p_s_sb, "vs-5150", ++ "invalid format found in block %ld. " ++ "Fsck?", p_s_bh->b_blocknr); + pathrelse(p_s_search_path); + return IO_ERROR; + } +@@ -1227,8 +1227,7 @@ int reiserfs_delete_item(struct reiserfs + if (n_ret_value == IO_ERROR) + break; + if (n_ret_value == FILE_NOT_FOUND) { +- reiserfs_warning(p_s_sb, +- "vs-5340: reiserfs_delete_item: " ++ reiserfs_warning(p_s_sb, "vs-5340", + "no items of the file %K found", + p_s_item_key); + break; +@@ -1338,10 +1337,9 @@ void reiserfs_delete_solid_item(struct r + while (1) { + retval = search_item(th->t_super, &cpu_key, &path); + if (retval == IO_ERROR) { +- reiserfs_warning(th->t_super, +- "vs-5350: reiserfs_delete_solid_item: " +- "i/o failure occurred trying to delete %K", +- &cpu_key); ++ reiserfs_warning(th->t_super, "vs-5350", ++ "i/o failure occurred trying " ++ "to delete %K", &cpu_key); + break; + } + if (retval != ITEM_FOUND) { +@@ -1355,9 +1353,8 @@ void reiserfs_delete_solid_item(struct r + GET_GENERATION_NUMBER(le_key_k_offset + (le_key_version(key), + key)) == 1)) +- reiserfs_warning(th->t_super, +- "vs-5355: reiserfs_delete_solid_item: %k not found", +- key); ++ reiserfs_warning(th->t_super, "vs-5355", ++ "%k not found", key); + break; + } + if (!tb_init) { +@@ -1389,8 +1386,7 @@ void reiserfs_delete_solid_item(struct r + break; + } + // IO_ERROR, NO_DISK_SPACE, etc +- reiserfs_warning(th->t_super, +- "vs-5360: reiserfs_delete_solid_item: " ++ reiserfs_warning(th->t_super, "vs-5360", + "could not delete %K due to fix_nodes failure", + &cpu_key); + unfix_nodes(&tb); +@@ -1533,8 +1529,9 @@ static void indirect_to_direct_roll_back + set_cpu_key_k_offset(&tail_key, + cpu_key_k_offset(&tail_key) - removed); + } +- reiserfs_warning(inode->i_sb, +- "indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space"); ++ reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct " ++ "conversion has been rolled back due to " ++ "lack of disk space"); + //mark_file_without_tail (inode); + mark_inode_dirty(inode); + } +@@ -1639,8 +1636,7 @@ int reiserfs_cut_from_item(struct reiser + if (n_ret_value == POSITION_FOUND) + continue; + +- reiserfs_warning(p_s_sb, +- "PAP-5610: reiserfs_cut_from_item: item %K not found", ++ reiserfs_warning(p_s_sb, "PAP-5610", "item %K not found", + p_s_item_key); + unfix_nodes(&s_cut_balance); + return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT; +@@ -1654,7 +1650,8 @@ int reiserfs_cut_from_item(struct reiser + indirect_to_direct_roll_back(th, p_s_inode, p_s_path); + } + if (n_ret_value == NO_DISK_SPACE) +- reiserfs_warning(p_s_sb, "NO_DISK_SPACE"); ++ reiserfs_warning(p_s_sb, "reiserfs-5092", ++ "NO_DISK_SPACE"); + unfix_nodes(&s_cut_balance); + return -EIO; + } +@@ -1743,8 +1740,7 @@ static void truncate_directory(struct re + { + BUG_ON(!th->t_trans_id); + if (inode->i_nlink) +- reiserfs_warning(inode->i_sb, +- "vs-5655: truncate_directory: link count != 0"); ++ reiserfs_warning(inode->i_sb, "vs-5655", "link count != 0"); + + set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET); + set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY); +@@ -1797,16 +1793,14 @@ int reiserfs_do_truncate(struct reiserfs + search_for_position_by_key(p_s_inode->i_sb, &s_item_key, + &s_search_path); + if (retval == IO_ERROR) { +- reiserfs_warning(p_s_inode->i_sb, +- "vs-5657: reiserfs_do_truncate: " ++ reiserfs_warning(p_s_inode->i_sb, "vs-5657", + "i/o failure occurred trying to truncate %K", + &s_item_key); + err = -EIO; + goto out; + } + if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) { +- reiserfs_warning(p_s_inode->i_sb, +- "PAP-5660: reiserfs_do_truncate: " ++ reiserfs_warning(p_s_inode->i_sb, "PAP-5660", + "wrong result %d of search for %K", retval, + &s_item_key); + +@@ -1850,8 +1844,8 @@ int reiserfs_do_truncate(struct reiserfs + reiserfs_cut_from_item(th, &s_search_path, &s_item_key, + p_s_inode, page, n_new_file_size); + if (n_deleted < 0) { +- reiserfs_warning(p_s_inode->i_sb, +- "vs-5665: reiserfs_do_truncate: reiserfs_cut_from_item failed"); ++ reiserfs_warning(p_s_inode->i_sb, "vs-5665", ++ "reiserfs_cut_from_item failed"); + reiserfs_check_path(&s_search_path); + return 0; + } +@@ -2000,8 +1994,8 @@ int reiserfs_paste_into_item(struct reis + goto error_out; + } + if (retval == POSITION_FOUND) { +- reiserfs_warning(inode->i_sb, +- "PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists", ++ reiserfs_warning(inode->i_sb, "PAP-5710", ++ "entry or pasted byte (%K) exists", + p_s_key); + retval = -EEXIST; + goto error_out; +@@ -2087,8 +2081,7 @@ int reiserfs_insert_item(struct reiserfs + goto error_out; + } + if (retval == ITEM_FOUND) { +- reiserfs_warning(th->t_super, +- "PAP-5760: reiserfs_insert_item: " ++ reiserfs_warning(th->t_super, "PAP-5760", + "key %K already exists in the tree", + key); + retval = -EEXIST; +--- a/fs/reiserfs/super.c ++++ b/fs/reiserfs/super.c +@@ -181,9 +181,9 @@ static int finish_unfinished(struct supe + if (REISERFS_SB(s)->s_qf_names[i]) { + int ret = reiserfs_quota_on_mount(s, i); + if (ret < 0) +- reiserfs_warning(s, +- "reiserfs: cannot turn on journaled quota: error %d", +- ret); ++ reiserfs_warning(s, "reiserfs-2500", ++ "cannot turn on journaled " ++ "quota: error %d", ret); + } + } + #endif +@@ -193,8 +193,8 @@ static int finish_unfinished(struct supe + while (!retval) { + retval = search_item(s, &max_cpu_key, &path); + if (retval != ITEM_NOT_FOUND) { +- reiserfs_warning(s, +- "vs-2140: finish_unfinished: search_by_key returned %d", ++ reiserfs_warning(s, "vs-2140", ++ "search_by_key returned %d", + retval); + break; + } +@@ -202,8 +202,8 @@ static int finish_unfinished(struct supe + bh = get_last_bh(&path); + item_pos = get_item_pos(&path); + if (item_pos != B_NR_ITEMS(bh)) { +- reiserfs_warning(s, +- "vs-2060: finish_unfinished: wrong position found"); ++ reiserfs_warning(s, "vs-2060", ++ "wrong position found"); + break; + } + item_pos--; +@@ -233,8 +233,7 @@ static int finish_unfinished(struct supe + if (!inode) { + /* the unlink almost completed, it just did not manage to remove + "save" link and release objectid */ +- reiserfs_warning(s, +- "vs-2180: finish_unfinished: iget failed for %K", ++ reiserfs_warning(s, "vs-2180", "iget failed for %K", + &obj_key); + retval = remove_save_link_only(s, &save_link_key, 1); + continue; +@@ -242,8 +241,8 @@ static int finish_unfinished(struct supe + + if (!truncate && inode->i_nlink) { + /* file is not unlinked */ +- reiserfs_warning(s, +- "vs-2185: finish_unfinished: file %K is not unlinked", ++ reiserfs_warning(s, "vs-2185", ++ "file %K is not unlinked", + &obj_key); + retval = remove_save_link_only(s, &save_link_key, 0); + continue; +@@ -255,8 +254,9 @@ static int finish_unfinished(struct supe + The only imaginable way is to execute unfinished truncate request + then boot into old kernel, remove the file and create dir with + the same key. */ +- reiserfs_warning(s, +- "green-2101: impossible truncate on a directory %k. Please report", ++ reiserfs_warning(s, "green-2101", ++ "impossible truncate on a " ++ "directory %k. Please report", + INODE_PKEY(inode)); + retval = remove_save_link_only(s, &save_link_key, 0); + truncate = 0; +@@ -286,9 +286,10 @@ static int finish_unfinished(struct supe + /* removal gets completed in iput */ + retval = 0; + } else { +- reiserfs_warning(s, "Dead loop in " +- "finish_unfinished detected, " +- "just remove save link\n"); ++ reiserfs_warning(s, "super-2189", "Dead loop " ++ "in finish_unfinished " ++ "detected, just remove " ++ "save link\n"); + retval = remove_save_link_only(s, + &save_link_key, 0); + } +@@ -358,8 +359,9 @@ void add_save_link(struct reiserfs_trans + } else { + /* truncate */ + if (S_ISDIR(inode->i_mode)) +- reiserfs_warning(inode->i_sb, +- "green-2102: Adding a truncate savelink for a directory %k! Please report", ++ reiserfs_warning(inode->i_sb, "green-2102", ++ "Adding a truncate savelink for " ++ "a directory %k! Please report", + INODE_PKEY(inode)); + set_cpu_key_k_offset(&key, 1); + set_cpu_key_k_type(&key, TYPE_INDIRECT); +@@ -374,7 +376,7 @@ void add_save_link(struct reiserfs_trans + retval = search_item(inode->i_sb, &key, &path); + if (retval != ITEM_NOT_FOUND) { + if (retval != -ENOSPC) +- reiserfs_warning(inode->i_sb, "vs-2100: add_save_link:" ++ reiserfs_warning(inode->i_sb, "vs-2100", + "search_by_key (%K) returned %d", &key, + retval); + pathrelse(&path); +@@ -389,9 +391,8 @@ void add_save_link(struct reiserfs_trans + reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link); + if (retval) { + if (retval != -ENOSPC) +- reiserfs_warning(inode->i_sb, +- "vs-2120: add_save_link: insert_item returned %d", +- retval); ++ reiserfs_warning(inode->i_sb, "vs-2120", ++ "insert_item returned %d", retval); + } else { + if (truncate) + REISERFS_I(inode)->i_flags |= +@@ -490,8 +491,7 @@ static void reiserfs_put_super(struct su + print_statistics(s); + + if (REISERFS_SB(s)->reserved_blocks != 0) { +- reiserfs_warning(s, +- "green-2005: reiserfs_put_super: reserved blocks left %d", ++ reiserfs_warning(s, "green-2005", "reserved blocks left %d", + REISERFS_SB(s)->reserved_blocks); + } + +@@ -557,8 +557,8 @@ static void reiserfs_dirty_inode(struct + + int err = 0; + if (inode->i_sb->s_flags & MS_RDONLY) { +- reiserfs_warning(inode->i_sb, +- "clm-6006: writing inode %lu on readonly FS", ++ reiserfs_warning(inode->i_sb, "clm-6006", ++ "writing inode %lu on readonly FS", + inode->i_ino); + return; + } +@@ -790,13 +790,15 @@ static int reiserfs_getopt(struct super_ + if (bit_flags) { + if (opt->clrmask == + (1 << REISERFS_UNSUPPORTED_OPT)) +- reiserfs_warning(s, "%s not supported.", ++ reiserfs_warning(s, "super-6500", ++ "%s not supported.\n", + p); + else + *bit_flags &= ~opt->clrmask; + if (opt->setmask == + (1 << REISERFS_UNSUPPORTED_OPT)) +- reiserfs_warning(s, "%s not supported.", ++ reiserfs_warning(s, "super-6501", ++ "%s not supported.\n", + p); + else + *bit_flags |= opt->setmask; +@@ -805,7 +807,8 @@ static int reiserfs_getopt(struct super_ + } + } + if (!opt->option_name) { +- reiserfs_warning(s, "unknown mount option \"%s\"", p); ++ reiserfs_warning(s, "super-6502", ++ "unknown mount option \"%s\"", p); + return -1; + } + +@@ -813,8 +816,9 @@ static int reiserfs_getopt(struct super_ + switch (*p) { + case '=': + if (!opt->arg_required) { +- reiserfs_warning(s, +- "the option \"%s\" does not require an argument", ++ reiserfs_warning(s, "super-6503", ++ "the option \"%s\" does not " ++ "require an argument\n", + opt->option_name); + return -1; + } +@@ -822,14 +826,15 @@ static int reiserfs_getopt(struct super_ + + case 0: + if (opt->arg_required) { +- reiserfs_warning(s, +- "the option \"%s\" requires an argument", +- opt->option_name); ++ reiserfs_warning(s, "super-6504", ++ "the option \"%s\" requires an " ++ "argument\n", opt->option_name); + return -1; + } + break; + default: +- reiserfs_warning(s, "head of option \"%s\" is only correct", ++ reiserfs_warning(s, "super-6505", ++ "head of option \"%s\" is only correct\n", + opt->option_name); + return -1; + } +@@ -841,7 +846,8 @@ static int reiserfs_getopt(struct super_ + && !(opt->arg_required & (1 << REISERFS_OPT_ALLOWEMPTY)) + && !strlen(p)) { + /* this catches "option=," if not allowed */ +- reiserfs_warning(s, "empty argument for \"%s\"", ++ reiserfs_warning(s, "super-6506", ++ "empty argument for \"%s\"\n", + opt->option_name); + return -1; + } +@@ -863,7 +869,8 @@ static int reiserfs_getopt(struct super_ + } + } + +- reiserfs_warning(s, "bad value \"%s\" for option \"%s\"", p, ++ reiserfs_warning(s, "super-6506", ++ "bad value \"%s\" for option \"%s\"\n", p, + opt->option_name); + return -1; + } +@@ -953,9 +960,9 @@ static int reiserfs_parse_options(struct + *blocks = simple_strtoul(arg, &p, 0); + if (*p != '\0') { + /* NNN does not look like a number */ +- reiserfs_warning(s, +- "reiserfs_parse_options: bad value %s", +- arg); ++ reiserfs_warning(s, "super-6507", ++ "bad value %s for " ++ "-oresize\n", arg); + return 0; + } + } +@@ -966,8 +973,8 @@ static int reiserfs_parse_options(struct + unsigned long val = simple_strtoul(arg, &p, 0); + /* commit=NNN (time in seconds) */ + if (*p != '\0' || val >= (unsigned int)-1) { +- reiserfs_warning(s, +- "reiserfs_parse_options: bad value %s", ++ reiserfs_warning(s, "super-6508", ++ "bad value %s for -ocommit\n", + arg); + return 0; + } +@@ -975,16 +982,18 @@ static int reiserfs_parse_options(struct + } + + if (c == 'w') { +- reiserfs_warning(s, "reiserfs: nolargeio option is no longer supported"); ++ reiserfs_warning(s, "super-6509", "nolargeio option " ++ "is no longer supported"); + return 0; + } + + if (c == 'j') { + if (arg && *arg && jdev_name) { + if (*jdev_name) { //Hm, already assigned? +- reiserfs_warning(s, +- "reiserfs_parse_options: journal device was already specified to be %s", +- *jdev_name); ++ reiserfs_warning(s, "super-6510", ++ "journal device was " ++ "already specified to " ++ "be %s", *jdev_name); + return 0; + } + *jdev_name = arg; +@@ -997,29 +1006,35 @@ static int reiserfs_parse_options(struct + if ((sb_any_quota_enabled(s) || + sb_any_quota_suspended(s)) && + (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) { +- reiserfs_warning(s, +- "reiserfs_parse_options: cannot change journaled quota options when quota turned on."); ++ reiserfs_warning(s, "super-6511", ++ "cannot change journaled " ++ "quota options when quota " ++ "turned on."); + return 0; + } + if (*arg) { /* Some filename specified? */ + if (REISERFS_SB(s)->s_qf_names[qtype] + && strcmp(REISERFS_SB(s)->s_qf_names[qtype], + arg)) { +- reiserfs_warning(s, +- "reiserfs_parse_options: %s quota file already specified.", ++ reiserfs_warning(s, "super-6512", ++ "%s quota file " ++ "already specified.", + QTYPE2NAME(qtype)); + return 0; + } + if (strchr(arg, '/')) { +- reiserfs_warning(s, +- "reiserfs_parse_options: quotafile must be on filesystem root."); ++ reiserfs_warning(s, "super-6513", ++ "quotafile must be " ++ "on filesystem root."); + return 0; + } + qf_names[qtype] = + kmalloc(strlen(arg) + 1, GFP_KERNEL); + if (!qf_names[qtype]) { +- reiserfs_warning(s, +- "reiserfs_parse_options: not enough memory for storing quotafile name."); ++ reiserfs_warning(s, "reiserfs-2502", ++ "not enough memory " ++ "for storing " ++ "quotafile name."); + return 0; + } + strcpy(qf_names[qtype], arg); +@@ -1037,22 +1052,25 @@ static int reiserfs_parse_options(struct + else if (!strcmp(arg, "vfsv0")) + *qfmt = QFMT_VFS_V0; + else { +- reiserfs_warning(s, +- "reiserfs_parse_options: unknown quota format specified."); ++ reiserfs_warning(s, "super-6514", ++ "unknown quota format " ++ "specified."); + return 0; + } + if ((sb_any_quota_enabled(s) || + sb_any_quota_suspended(s)) && + *qfmt != REISERFS_SB(s)->s_jquota_fmt) { +- reiserfs_warning(s, +- "reiserfs_parse_options: cannot change journaled quota options when quota turned on."); ++ reiserfs_warning(s, "super-6515", ++ "cannot change journaled " ++ "quota options when quota " ++ "turned on."); + return 0; + } + } + #else + if (c == 'u' || c == 'g' || c == 'f') { +- reiserfs_warning(s, +- "reiserfs_parse_options: journaled quota options not supported."); ++ reiserfs_warning(s, "reiserfs-2503", "journaled " ++ "quota options not supported."); + return 0; + } + #endif +@@ -1061,15 +1079,15 @@ static int reiserfs_parse_options(struct + #ifdef CONFIG_QUOTA + if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt + && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) { +- reiserfs_warning(s, +- "reiserfs_parse_options: journaled quota format not specified."); ++ reiserfs_warning(s, "super-6515", ++ "journaled quota format not specified."); + return 0; + } + /* This checking is not precise wrt the quota type but for our purposes it is sufficient */ + if (!(*mount_options & (1 << REISERFS_QUOTA)) + && sb_any_quota_enabled(s)) { +- reiserfs_warning(s, +- "reiserfs_parse_options: quota options must be present when quota is turned on."); ++ reiserfs_warning(s, "super-6516", "quota options must " ++ "be present when quota is turned on."); + return 0; + } + #endif +@@ -1129,14 +1147,15 @@ static void handle_attrs(struct super_bl + + if (reiserfs_attrs(s)) { + if (old_format_only(s)) { +- reiserfs_warning(s, +- "reiserfs: cannot support attributes on 3.5.x disk format"); ++ reiserfs_warning(s, "super-6517", "cannot support " ++ "attributes on 3.5.x disk format"); + REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS); + return; + } + if (!(le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared)) { +- reiserfs_warning(s, +- "reiserfs: cannot support attributes until flag is set in super-block"); ++ reiserfs_warning(s, "super-6518", "cannot support " ++ "attributes until flag is set in " ++ "super-block"); + REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS); + } + } +@@ -1314,7 +1333,7 @@ static int read_super_block(struct super + + bh = sb_bread(s, offset / s->s_blocksize); + if (!bh) { +- reiserfs_warning(s, "sh-2006: read_super_block: " ++ reiserfs_warning(s, "sh-2006", + "bread failed (dev %s, block %lu, size %lu)", + reiserfs_bdevname(s), offset / s->s_blocksize, + s->s_blocksize); +@@ -1335,8 +1354,8 @@ static int read_super_block(struct super + + bh = sb_bread(s, offset / s->s_blocksize); + if (!bh) { +- reiserfs_warning(s, "sh-2007: read_super_block: " +- "bread failed (dev %s, block %lu, size %lu)\n", ++ reiserfs_warning(s, "sh-2007", ++ "bread failed (dev %s, block %lu, size %lu)", + reiserfs_bdevname(s), offset / s->s_blocksize, + s->s_blocksize); + return 1; +@@ -1344,8 +1363,8 @@ static int read_super_block(struct super + + rs = (struct reiserfs_super_block *)bh->b_data; + if (sb_blocksize(rs) != s->s_blocksize) { +- reiserfs_warning(s, "sh-2011: read_super_block: " +- "can't find a reiserfs filesystem on (dev %s, block %Lu, size %lu)\n", ++ reiserfs_warning(s, "sh-2011", "can't find a reiserfs " ++ "filesystem on (dev %s, block %Lu, size %lu)", + reiserfs_bdevname(s), + (unsigned long long)bh->b_blocknr, + s->s_blocksize); +@@ -1355,9 +1374,10 @@ static int read_super_block(struct super + + if (rs->s_v1.s_root_block == cpu_to_le32(-1)) { + brelse(bh); +- reiserfs_warning(s, +- "Unfinished reiserfsck --rebuild-tree run detected. Please run\n" +- "reiserfsck --rebuild-tree and wait for a completion. If that fails\n" ++ reiserfs_warning(s, "super-6519", "Unfinished reiserfsck " ++ "--rebuild-tree run detected. Please run\n" ++ "reiserfsck --rebuild-tree and wait for a " ++ "completion. If that fails\n" + "get newer reiserfsprogs package"); + return 1; + } +@@ -1375,10 +1395,9 @@ static int read_super_block(struct super + reiserfs_info(s, "found reiserfs format \"3.5\"" + " with non-standard journal\n"); + else { +- reiserfs_warning(s, +- "sh-2012: read_super_block: found unknown " +- "format \"%u\" of reiserfs with non-standard magic", +- sb_version(rs)); ++ reiserfs_warning(s, "sh-2012", "found unknown " ++ "format \"%u\" of reiserfs with " ++ "non-standard magic", sb_version(rs)); + return 1; + } + } else +@@ -1408,8 +1427,7 @@ static int reread_meta_blocks(struct sup + ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); + wait_on_buffer(SB_BUFFER_WITH_SB(s)); + if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { +- reiserfs_warning(s, +- "reread_meta_blocks, error reading the super"); ++ reiserfs_warning(s, "reiserfs-2504", "error reading the super"); + return 1; + } + +@@ -1473,10 +1491,10 @@ static __u32 find_hash_out(struct super_ + && (yurahash == + GET_HASH_VALUE(deh_offset + (&(de.de_deh[de.de_entry_num])))))) { +- reiserfs_warning(s, +- "Unable to automatically detect hash function. " +- "Please mount with -o hash={tea,rupasov,r5}", +- reiserfs_bdevname(s)); ++ reiserfs_warning(s, "reiserfs-2506", "Unable to " ++ "automatically detect hash function. " ++ "Please mount with -o " ++ "hash={tea,rupasov,r5}"); + hash = UNSET_HASH; + break; + } +@@ -1490,7 +1508,8 @@ static __u32 find_hash_out(struct super_ + (deh_offset(&(de.de_deh[de.de_entry_num]))) == r5hash) + hash = R5_HASH; + else { +- reiserfs_warning(s, "Unrecognised hash function"); ++ reiserfs_warning(s, "reiserfs-2506", ++ "Unrecognised hash function"); + hash = UNSET_HASH; + } + } while (0); +@@ -1518,17 +1537,20 @@ static int what_hash(struct super_block + ** mount options + */ + if (reiserfs_rupasov_hash(s) && code != YURA_HASH) { +- reiserfs_warning(s, "Error, %s hash detected, " ++ reiserfs_warning(s, "reiserfs-2507", ++ "Error, %s hash detected, " + "unable to force rupasov hash", + reiserfs_hashname(code)); + code = UNSET_HASH; + } else if (reiserfs_tea_hash(s) && code != TEA_HASH) { +- reiserfs_warning(s, "Error, %s hash detected, " ++ reiserfs_warning(s, "reiserfs-2508", ++ "Error, %s hash detected, " + "unable to force tea hash", + reiserfs_hashname(code)); + code = UNSET_HASH; + } else if (reiserfs_r5_hash(s) && code != R5_HASH) { +- reiserfs_warning(s, "Error, %s hash detected, " ++ reiserfs_warning(s, "reiserfs-2509", ++ "Error, %s hash detected, " + "unable to force r5 hash", + reiserfs_hashname(code)); + code = UNSET_HASH; +@@ -1587,9 +1609,9 @@ static int function2code(hashf_t func) + return 0; + } + +-#define SWARN(silent, s, ...) \ ++#define SWARN(silent, s, id, ...) \ + if (!(silent)) \ +- reiserfs_warning (s, __VA_ARGS__) ++ reiserfs_warning(s, id, __VA_ARGS__) + + static int reiserfs_fill_super(struct super_block *s, void *data, int silent) + { +@@ -1641,8 +1663,7 @@ static int reiserfs_fill_super(struct su + #endif + + if (blocks) { +- SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option " +- "for remount only"); ++ SWARN(silent, s, "jmacd-7", "resize option for remount only"); + goto error; + } + +@@ -1651,8 +1672,7 @@ static int reiserfs_fill_super(struct su + old_format = 1; + /* try new format (64-th 1k block), which can contain reiserfs super block */ + else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { +- SWARN(silent, s, +- "sh-2021: reiserfs_fill_super: can not find reiserfs on %s", ++ SWARN(silent, s, "sh-2021", "can not find reiserfs on %s", + reiserfs_bdevname(s)); + goto error; + } +@@ -1664,13 +1684,12 @@ static int reiserfs_fill_super(struct su + if (s->s_bdev && s->s_bdev->bd_inode + && i_size_read(s->s_bdev->bd_inode) < + sb_block_count(rs) * sb_blocksize(rs)) { +- SWARN(silent, s, +- "Filesystem on %s cannot be mounted because it is bigger than the device", +- reiserfs_bdevname(s)); +- SWARN(silent, s, +- "You may need to run fsck or increase size of your LVM partition"); +- SWARN(silent, s, +- "Or may be you forgot to reboot after fdisk when it told you to"); ++ SWARN(silent, s, "", "Filesystem cannot be " ++ "mounted because it is bigger than the device"); ++ SWARN(silent, s, "", "You may need to run fsck " ++ "or increase size of your LVM partition"); ++ SWARN(silent, s, "", "Or may be you forgot to " ++ "reboot after fdisk when it told you to"); + goto error; + } + +@@ -1678,14 +1697,13 @@ static int reiserfs_fill_super(struct su + sbi->s_mount_state = REISERFS_VALID_FS; + + if ((errval = reiserfs_init_bitmap_cache(s))) { +- SWARN(silent, s, +- "jmacd-8: reiserfs_fill_super: unable to read bitmap"); ++ SWARN(silent, s, "jmacd-8", "unable to read bitmap"); + goto error; + } + errval = -EINVAL; + #ifdef CONFIG_REISERFS_CHECK +- SWARN(silent, s, "CONFIG_REISERFS_CHECK is set ON"); +- SWARN(silent, s, "- it is slow mode for debugging."); ++ SWARN(silent, s, "", "CONFIG_REISERFS_CHECK is set ON"); ++ SWARN(silent, s, "", "- it is slow mode for debugging."); + #endif + + /* make data=ordered the default */ +@@ -1706,8 +1724,8 @@ static int reiserfs_fill_super(struct su + } + // set_device_ro(s->s_dev, 1) ; + if (journal_init(s, jdev_name, old_format, commit_max_age)) { +- SWARN(silent, s, +- "sh-2022: reiserfs_fill_super: unable to initialize journal space"); ++ SWARN(silent, s, "sh-2022", ++ "unable to initialize journal space"); + goto error; + } else { + jinit_done = 1; /* once this is set, journal_release must be called +@@ -1715,8 +1733,8 @@ static int reiserfs_fill_super(struct su + */ + } + if (reread_meta_blocks(s)) { +- SWARN(silent, s, +- "jmacd-9: reiserfs_fill_super: unable to reread meta blocks after journal init"); ++ SWARN(silent, s, "jmacd-9", ++ "unable to reread meta blocks after journal init"); + goto error; + } + +@@ -1724,8 +1742,8 @@ static int reiserfs_fill_super(struct su + goto error; + + if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) { +- SWARN(silent, s, +- "clm-7000: Detected readonly device, marking FS readonly"); ++ SWARN(silent, s, "clm-7000", ++ "Detected readonly device, marking FS readonly"); + s->s_flags |= MS_RDONLY; + } + args.objectid = REISERFS_ROOT_OBJECTID; +@@ -1734,8 +1752,7 @@ static int reiserfs_fill_super(struct su + iget5_locked(s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor, + reiserfs_init_locked_inode, (void *)(&args)); + if (!root_inode) { +- SWARN(silent, s, +- "jmacd-10: reiserfs_fill_super: get root inode failed"); ++ SWARN(silent, s, "jmacd-10", "get root inode failed"); + goto error; + } + +@@ -1784,7 +1801,7 @@ static int reiserfs_fill_super(struct su + * avoiding corruption. -jeffm */ + if (bmap_would_wrap(reiserfs_bmap_count(s)) && + sb_bmap_nr(rs) != 0) { +- reiserfs_warning(s, "super-2030: This file system " ++ reiserfs_warning(s, "super-2030", "This file system " + "claims to use %u bitmap blocks in " + "its super block, but requires %u. " + "Clearing to zero.", sb_bmap_nr(rs), +@@ -2085,8 +2102,8 @@ static int reiserfs_quota_on(struct supe + if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { + err = reiserfs_unpack(inode, NULL); + if (err) { +- reiserfs_warning(sb, +- "reiserfs: Unpacking tail of quota file failed" ++ reiserfs_warning(sb, "super-6520", ++ "Unpacking tail of quota file failed" + " (%d). Cannot turn on quotas.", err); + err = -EINVAL; + goto out; +@@ -2097,8 +2114,8 @@ static int reiserfs_quota_on(struct supe + if (REISERFS_SB(sb)->s_qf_names[type]) { + /* Quotafile not of fs root? */ + if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) +- reiserfs_warning(sb, +- "reiserfs: Quota file not on filesystem root. " ++ reiserfs_warning(sb, "super-6521", ++ "Quota file not on filesystem root. " + "Journalled quota will not work."); + } + +--- a/fs/reiserfs/tail_conversion.c ++++ b/fs/reiserfs/tail_conversion.c +@@ -48,9 +48,9 @@ int direct2indirect(struct reiserfs_tran + + // FIXME: we could avoid this + if (search_for_position_by_key(sb, &end_key, path) == POSITION_FOUND) { +- reiserfs_warning(sb, "PAP-14030: direct2indirect: " +- "pasted or inserted byte exists in the tree %K. " +- "Use fsck to repair.", &end_key); ++ reiserfs_warning(sb, "PAP-14030", ++ "pasted or inserted byte exists in " ++ "the tree %K. Use fsck to repair.", &end_key); + pathrelse(path); + return -EIO; + } +--- a/fs/reiserfs/xattr.c ++++ b/fs/reiserfs/xattr.c +@@ -259,7 +259,8 @@ static int __xattr_readdir(struct inode + ih = de.de_ih; + + if (!is_direntry_le_ih(ih)) { +- reiserfs_warning(inode->i_sb, "not direntry %h", ih); ++ reiserfs_warning(inode->i_sb, "jdm-20000", ++ "not direntry %h", ih); + break; + } + copy_item_head(&tmp_ih, ih); +@@ -598,7 +599,7 @@ reiserfs_xattr_get(const struct inode *i + if (rxh->h_magic != cpu_to_le32(REISERFS_XATTR_MAGIC)) { + unlock_page(page); + reiserfs_put_page(page); +- reiserfs_warning(inode->i_sb, ++ reiserfs_warning(inode->i_sb, "jdm-20001", + "Invalid magic for xattr (%s) " + "associated with %k", name, + INODE_PKEY(inode)); +@@ -618,7 +619,7 @@ reiserfs_xattr_get(const struct inode *i + + if (xattr_hash(buffer, isize - sizeof(struct reiserfs_xattr_header)) != + hash) { +- reiserfs_warning(inode->i_sb, ++ reiserfs_warning(inode->i_sb, "jdm-20002", + "Invalid hash for xattr (%s) associated " + "with %k", name, INODE_PKEY(inode)); + err = -EIO; +@@ -652,7 +653,8 @@ __reiserfs_xattr_del(struct dentry *xadi + goto out_file; + + if (!is_reiserfs_priv_object(dentry->d_inode)) { +- reiserfs_warning(dir->i_sb, "OID %08x [%.*s/%.*s] doesn't have " ++ reiserfs_warning(dir->i_sb, "jdm-20003", ++ "OID %08x [%.*s/%.*s] doesn't have " + "priv flag set [parent is %sset].", + le32_to_cpu(INODE_PKEY(dentry->d_inode)-> + k_objectid), xadir->d_name.len, +@@ -750,7 +752,7 @@ int reiserfs_delete_xattrs(struct inode + reiserfs_write_unlock_xattrs(inode->i_sb); + dput(root); + } else { +- reiserfs_warning(inode->i_sb, ++ reiserfs_warning(inode->i_sb, "jdm-20006", + "Couldn't remove all entries in directory"); + } + unlock_kernel(); +@@ -1154,7 +1156,8 @@ int reiserfs_xattr_init(struct super_blo + } else if (reiserfs_xattrs_optional(s)) { + /* Old format filesystem, but optional xattrs have been enabled + * at mount time. Error out. */ +- reiserfs_warning(s, "xattrs/ACLs not supported on pre v3.6 " ++ reiserfs_warning(s, "jdm-20005", ++ "xattrs/ACLs not supported on pre v3.6 " + "format filesystem. Failing mount."); + err = -EOPNOTSUPP; + goto error; +@@ -1201,8 +1204,10 @@ int reiserfs_xattr_init(struct super_blo + /* If we're read-only it just means that the dir hasn't been + * created. Not an error -- just no xattrs on the fs. We'll + * check again if we go read-write */ +- reiserfs_warning(s, "xattrs/ACLs enabled and couldn't " +- "find/create .reiserfs_priv. Failing mount."); ++ reiserfs_warning(s, "jdm-20006", ++ "xattrs/ACLs enabled and couldn't " ++ "find/create .reiserfs_priv. " ++ "Failing mount."); + err = -EOPNOTSUPP; + } + } +--- a/include/linux/reiserfs_fs.h ++++ b/include/linux/reiserfs_fs.h +@@ -79,7 +79,10 @@ struct fid; + */ + #define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */ + +-void reiserfs_warning(struct super_block *s, const char *fmt, ...); ++void __reiserfs_warning(struct super_block *s, const char *id, ++ const char *func, const char *fmt, ...); ++#define reiserfs_warning(s, id, fmt, args...) \ ++ __reiserfs_warning(s, id, __func__, fmt, ##args) + /* assertions handling */ + + /** always check a condition and panic if it's false. */ +@@ -558,7 +561,7 @@ static inline int uniqueness2type(__u32 + case V1_DIRENTRY_UNIQUENESS: + return TYPE_DIRENTRY; + default: +- reiserfs_warning(NULL, "vs-500: unknown uniqueness %d", ++ reiserfs_warning(NULL, "vs-500", "unknown uniqueness %d", + uniqueness); + case V1_ANY_UNIQUENESS: + return TYPE_ANY; +@@ -578,7 +581,7 @@ static inline __u32 type2uniqueness(int + case TYPE_DIRENTRY: + return V1_DIRENTRY_UNIQUENESS; + default: +- reiserfs_warning(NULL, "vs-501: unknown type %d", type); ++ reiserfs_warning(NULL, "vs-501", "unknown type %d", type); + case TYPE_ANY: + return V1_ANY_UNIQUENESS; + } diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-reiserfs_info.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-reiserfs_info.diff new file mode 100644 index 0000000000..8992e1b4ca --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-reiserfs_info.diff @@ -0,0 +1,87 @@ +From: Jeff Mahoney +Subject: reiserfs: make some warnings informational + + In several places, reiserfs_warning is used when there is no warning, just + a notice. This patch changes some of them to indicate that the message + is merely informational. + +Signed-off-by: Jeff Mahoney + +-- + fs/reiserfs/bitmap.c | 6 +++--- + fs/reiserfs/super.c | 14 ++++++-------- + fs/reiserfs/xattr.c | 10 ++++------ + 3 files changed, 13 insertions(+), 17 deletions(-) + +--- a/fs/reiserfs/bitmap.c ++++ b/fs/reiserfs/bitmap.c +@@ -40,8 +40,8 @@ + + #define SET_OPTION(optname) \ + do { \ +- reiserfs_warning(s, "reiserfs: option \"%s\" is set", #optname); \ +- set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \ ++ reiserfs_info(s, "block allocator option \"%s\" is set", #optname); \ ++ set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \ + } while(0) + #define TEST_OPTION(optname, s) \ + test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)) +@@ -636,7 +636,7 @@ int reiserfs_parse_alloc_options(struct + return 1; + } + +- reiserfs_warning(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s)); ++ reiserfs_info(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s)); + return 0; + } + +--- a/fs/reiserfs/super.c ++++ b/fs/reiserfs/super.c +@@ -1369,13 +1369,11 @@ static int read_super_block(struct super + /* magic is of non-standard journal filesystem, look at s_version to + find which format is in use */ + if (sb_version(rs) == REISERFS_VERSION_2) +- reiserfs_warning(s, +- "read_super_block: found reiserfs format \"3.6\"" +- " with non-standard journal"); ++ reiserfs_info(s, "found reiserfs format \"3.6\"" ++ " with non-standard journal\n"); + else if (sb_version(rs) == REISERFS_VERSION_1) +- reiserfs_warning(s, +- "read_super_block: found reiserfs format \"3.5\"" +- " with non-standard journal"); ++ reiserfs_info(s, "found reiserfs format \"3.5\"" ++ " with non-standard journal\n"); + else { + reiserfs_warning(s, + "sh-2012: read_super_block: found unknown " +@@ -1454,8 +1452,8 @@ static __u32 find_hash_out(struct super_ + if (reiserfs_rupasov_hash(s)) { + hash = YURA_HASH; + } +- reiserfs_warning(s, "FS seems to be empty, autodetect " +- "is using the default hash"); ++ reiserfs_info(s, "FS seems to be empty, autodetect " ++ "is using the default hash\n"); + break; + } + r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen)); +--- a/fs/reiserfs/xattr.c ++++ b/fs/reiserfs/xattr.c +@@ -1182,12 +1182,10 @@ int reiserfs_xattr_init(struct super_blo + } + + if (dentry && dentry->d_inode) +- reiserfs_warning(s, +- "Created %s on %s - reserved for " +- "xattr storage.", +- PRIVROOT_NAME, +- reiserfs_bdevname +- (inode->i_sb)); ++ reiserfs_info(s, "Created %s - " ++ "reserved for xattr " ++ "storage.\n", ++ PRIVROOT_NAME); + } else if (!dentry->d_inode) { + dput(dentry); + dentry = NULL; diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-reiserfs_panic.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-reiserfs_panic.diff new file mode 100644 index 0000000000..482b43a482 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-reiserfs_panic.diff @@ -0,0 +1,970 @@ +From: Jeff Mahoney +Subject: reiserfs: rework reiserfs_panic + + ReiserFS panics can be somewhat inconsistent. + In some cases: + * a unique identifier may be associated with it + * the function name may be included + * the device may be printed separately + + This patch aims to make warnings more consistent. reiserfs_warning() prints + the device name, so printing it a second time is not required. The function + name for a warning is always helpful in debugging, so it is now automatically + inserted into the output. Hans has stated that every warning should have + a unique identifier. Some cases lack them, others really shouldn't have them. + reiserfs_warning() now expects an id associated with each message. In the + rare case where one isn't needed, "" will suffice. + +Signed-off-by: Jeff Mahoney + +--- + fs/reiserfs/do_balan.c | 67 +++++++++++++++++++++-------------------- + fs/reiserfs/fix_node.c | 68 +++++++++++++++++++++--------------------- + fs/reiserfs/ibalance.c | 12 +++---- + fs/reiserfs/inode.c | 3 - + fs/reiserfs/item_ops.c | 8 +++- + fs/reiserfs/journal.c | 57 +++++++++++++++++------------------ + fs/reiserfs/lbalance.c | 27 +++++++++------- + fs/reiserfs/namei.c | 18 ++++------- + fs/reiserfs/objectid.c | 3 - + fs/reiserfs/prints.c | 33 +++++++++----------- + fs/reiserfs/stree.c | 49 ++++++++++++++---------------- + fs/reiserfs/tail_conversion.c | 10 ++---- + include/linux/reiserfs_fs.h | 28 +++++++++++++---- + 13 files changed, 200 insertions(+), 183 deletions(-) + +--- a/fs/reiserfs/do_balan.c ++++ b/fs/reiserfs/do_balan.c +@@ -153,8 +153,8 @@ static int balance_leaf_when_delete(stru + + default: + print_cur_tb("12040"); +- reiserfs_panic(tb->tb_sb, +- "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)", ++ reiserfs_panic(tb->tb_sb, "PAP-12040", ++ "unexpected mode: %s(%d)", + (flag == + M_PASTE) ? "PASTE" : ((flag == + M_INSERT) ? "INSERT" : +@@ -721,8 +721,9 @@ static int balance_leaf(struct tree_bala + } + break; + default: /* cases d and t */ +- reiserfs_panic(tb->tb_sb, +- "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)", ++ reiserfs_panic(tb->tb_sb, "PAP-12130", ++ "lnum > 0: unexpected mode: " ++ " %s(%d)", + (flag == + M_DELETE) ? "DELETE" : ((flag == + M_CUT) +@@ -1134,8 +1135,8 @@ static int balance_leaf(struct tree_bala + } + break; + default: /* cases d and t */ +- reiserfs_panic(tb->tb_sb, +- "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)", ++ reiserfs_panic(tb->tb_sb, "PAP-12175", ++ "rnum > 0: unexpected mode: %s(%d)", + (flag == + M_DELETE) ? "DELETE" : ((flag == + M_CUT) ? "CUT" +@@ -1165,8 +1166,8 @@ static int balance_leaf(struct tree_bala + not set correctly */ + if (tb->CFL[0]) { + if (!tb->CFR[0]) +- reiserfs_panic(tb->tb_sb, +- "vs-12195: balance_leaf: CFR not initialized"); ++ reiserfs_panic(tb->tb_sb, "vs-12195", ++ "CFR not initialized"); + copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), + B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])); + do_balance_mark_internal_dirty(tb, tb->CFL[0], 0); +@@ -1472,7 +1473,10 @@ static int balance_leaf(struct tree_bala + && (pos_in_item != ih_item_len(ih_check) + || tb->insert_size[0] <= 0)) + reiserfs_panic(tb->tb_sb, +- "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len"); ++ "PAP-12235", ++ "pos_in_item " ++ "must be equal " ++ "to ih_item_len"); + #endif /* CONFIG_REISERFS_CHECK */ + + leaf_mi = +@@ -1532,8 +1536,8 @@ static int balance_leaf(struct tree_bala + } + break; + default: /* cases d and t */ +- reiserfs_panic(tb->tb_sb, +- "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)", ++ reiserfs_panic(tb->tb_sb, "PAP-12245", ++ "blknum > 2: unexpected mode: %s(%d)", + (flag == + M_DELETE) ? "DELETE" : ((flag == + M_CUT) ? "CUT" +@@ -1678,10 +1682,11 @@ static int balance_leaf(struct tree_bala + print_cur_tb("12285"); + reiserfs_panic(tb-> + tb_sb, +- "PAP-12285: balance_leaf: insert_size must be 0 (%d)", +- tb-> +- insert_size +- [0]); ++ "PAP-12285", ++ "insert_size " ++ "must be 0 " ++ "(%d)", ++ tb->insert_size[0]); + } + } + #endif /* CONFIG_REISERFS_CHECK */ +@@ -1694,11 +1699,10 @@ static int balance_leaf(struct tree_bala + if (flag == M_PASTE && tb->insert_size[0]) { + print_cur_tb("12290"); + reiserfs_panic(tb->tb_sb, +- "PAP-12290: balance_leaf: insert_size is still not 0 (%d)", ++ "PAP-12290", "insert_size is still not 0 (%d)", + tb->insert_size[0]); + } + #endif /* CONFIG_REISERFS_CHECK */ +- + return 0; + } /* Leaf level of the tree is balanced (end of balance_leaf) */ + +@@ -1729,8 +1733,7 @@ struct buffer_head *get_FEB(struct tree_ + break; + + if (i == MAX_FEB_SIZE) +- reiserfs_panic(tb->tb_sb, +- "vs-12300: get_FEB: FEB list is empty"); ++ reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty"); + + bi.tb = tb; + bi.bi_bh = first_b = tb->FEB[i]; +@@ -1871,8 +1874,8 @@ static void check_internal_node(struct s + for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) { + if (!is_reusable(s, dc_block_number(dc), 1)) { + print_cur_tb(mes); +- reiserfs_panic(s, +- "PAP-12338: check_internal_node: invalid child pointer %y in %b", ++ reiserfs_panic(s, "PAP-12338", ++ "invalid child pointer %y in %b", + dc, bh); + } + } +@@ -1894,9 +1897,10 @@ static int check_before_balancing(struct + int retval = 0; + + if (cur_tb) { +- reiserfs_panic(tb->tb_sb, "vs-12335: check_before_balancing: " +- "suspect that schedule occurred based on cur_tb not being null at this point in code. " +- "do_balance cannot properly handle schedule occurring while it runs."); ++ reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule " ++ "occurred based on cur_tb not being null at " ++ "this point in code. do_balance cannot properly " ++ "handle schedule occurring while it runs."); + } + + /* double check that buffers that we will modify are unlocked. (fix_nodes should already have +@@ -1928,8 +1932,8 @@ static void check_after_balance_leaf(str + dc_size(B_N_CHILD + (tb->FL[0], get_left_neighbor_position(tb, 0)))) { + print_cur_tb("12221"); +- reiserfs_panic(tb->tb_sb, +- "PAP-12355: check_after_balance_leaf: shift to left was incorrect"); ++ reiserfs_panic(tb->tb_sb, "PAP-12355", ++ "shift to left was incorrect"); + } + } + if (tb->rnum[0]) { +@@ -1938,8 +1942,8 @@ static void check_after_balance_leaf(str + dc_size(B_N_CHILD + (tb->FR[0], get_right_neighbor_position(tb, 0)))) { + print_cur_tb("12222"); +- reiserfs_panic(tb->tb_sb, +- "PAP-12360: check_after_balance_leaf: shift to right was incorrect"); ++ reiserfs_panic(tb->tb_sb, "PAP-12360", ++ "shift to right was incorrect"); + } + } + if (PATH_H_PBUFFER(tb->tb_path, 1) && +@@ -1964,8 +1968,7 @@ static void check_after_balance_leaf(str + (PATH_H_PBUFFER(tb->tb_path, 1), + PATH_H_POSITION(tb->tb_path, 1))), + right); +- reiserfs_panic(tb->tb_sb, +- "PAP-12365: check_after_balance_leaf: S is incorrect"); ++ reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect"); + } + } + +@@ -2100,8 +2103,8 @@ void do_balance(struct tree_balance *tb, + tb->need_balance_dirty = 0; + + if (FILESYSTEM_CHANGED_TB(tb)) { +- reiserfs_panic(tb->tb_sb, +- "clm-6000: do_balance, fs generation has changed\n"); ++ reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has " ++ "changed"); + } + /* if we have no real work to do */ + if (!tb->insert_size[0]) { +--- a/fs/reiserfs/fix_node.c ++++ b/fs/reiserfs/fix_node.c +@@ -135,8 +135,7 @@ static void create_virtual_node(struct t + vn->vn_free_ptr += + op_create_vi(vn, vi, is_affected, tb->insert_size[0]); + if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr) +- reiserfs_panic(tb->tb_sb, +- "vs-8030: create_virtual_node: " ++ reiserfs_panic(tb->tb_sb, "vs-8030", + "virtual node space consumed"); + + if (!is_affected) +@@ -186,8 +185,9 @@ static void create_virtual_node(struct t + && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) { + /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */ + print_block(Sh, 0, -1, -1); +- reiserfs_panic(tb->tb_sb, +- "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c", ++ reiserfs_panic(tb->tb_sb, "vs-8045", ++ "rdkey %k, affected item==%d " ++ "(mode==%c) Must be %c", + key, vn->vn_affected_item_num, + vn->vn_mode, M_DELETE); + } +@@ -1255,8 +1255,8 @@ static int ip_check_balance(struct tree_ + /* Calculate balance parameters for creating new root. */ + if (!Sh) { + if (!h) +- reiserfs_panic(tb->tb_sb, +- "vs-8210: ip_check_balance: S[0] can not be 0"); ++ reiserfs_panic(tb->tb_sb, "vs-8210", ++ "S[0] can not be 0"); + switch (n_ret_value = get_empty_nodes(tb, h)) { + case CARRY_ON: + set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); +@@ -1266,8 +1266,8 @@ static int ip_check_balance(struct tree_ + case REPEAT_SEARCH: + return n_ret_value; + default: +- reiserfs_panic(tb->tb_sb, +- "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes"); ++ reiserfs_panic(tb->tb_sb, "vs-8215", "incorrect " ++ "return value of get_empty_nodes"); + } + } + +@@ -2095,38 +2095,38 @@ static void tb_buffer_sanity_check(struc + if (p_s_bh) { + if (atomic_read(&(p_s_bh->b_count)) <= 0) { + +- reiserfs_panic(p_s_sb, +- "jmacd-1: tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n", +- descr, level, p_s_bh); ++ reiserfs_panic(p_s_sb, "jmacd-1", "negative or zero " ++ "reference counter for buffer %s[%d] " ++ "(%b)", descr, level, p_s_bh); + } + + if (!buffer_uptodate(p_s_bh)) { +- reiserfs_panic(p_s_sb, +- "jmacd-2: tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n", ++ reiserfs_panic(p_s_sb, "jmacd-2", "buffer is not up " ++ "to date %s[%d] (%b)", + descr, level, p_s_bh); + } + + if (!B_IS_IN_TREE(p_s_bh)) { +- reiserfs_panic(p_s_sb, +- "jmacd-3: tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n", ++ reiserfs_panic(p_s_sb, "jmacd-3", "buffer is not " ++ "in tree %s[%d] (%b)", + descr, level, p_s_bh); + } + + if (p_s_bh->b_bdev != p_s_sb->s_bdev) { +- reiserfs_panic(p_s_sb, +- "jmacd-4: tb_buffer_sanity_check(): buffer has wrong device %s[%d] (%b)\n", ++ reiserfs_panic(p_s_sb, "jmacd-4", "buffer has wrong " ++ "device %s[%d] (%b)", + descr, level, p_s_bh); + } + + if (p_s_bh->b_size != p_s_sb->s_blocksize) { +- reiserfs_panic(p_s_sb, +- "jmacd-5: tb_buffer_sanity_check(): buffer has wrong blocksize %s[%d] (%b)\n", ++ reiserfs_panic(p_s_sb, "jmacd-5", "buffer has wrong " ++ "blocksize %s[%d] (%b)", + descr, level, p_s_bh); + } + + if (p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) { +- reiserfs_panic(p_s_sb, +- "jmacd-6: tb_buffer_sanity_check(): buffer block number too high %s[%d] (%b)\n", ++ reiserfs_panic(p_s_sb, "jmacd-6", "buffer block " ++ "number too high %s[%d] (%b)", + descr, level, p_s_bh); + } + } +@@ -2358,14 +2358,14 @@ int fix_nodes(int n_op_mode, struct tree + #ifdef CONFIG_REISERFS_CHECK + if (cur_tb) { + print_cur_tb("fix_nodes"); +- reiserfs_panic(p_s_tb->tb_sb, +- "PAP-8305: fix_nodes: there is pending do_balance"); ++ reiserfs_panic(p_s_tb->tb_sb, "PAP-8305", ++ "there is pending do_balance"); + } + + if (!buffer_uptodate(p_s_tbS0) || !B_IS_IN_TREE(p_s_tbS0)) { +- reiserfs_panic(p_s_tb->tb_sb, +- "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate " +- "at the beginning of fix_nodes or not in tree (mode %c)", ++ reiserfs_panic(p_s_tb->tb_sb, "PAP-8320", "S[0] (%b %z) is " ++ "not uptodate at the beginning of fix_nodes " ++ "or not in tree (mode %c)", + p_s_tbS0, p_s_tbS0, n_op_mode); + } + +@@ -2373,24 +2373,26 @@ int fix_nodes(int n_op_mode, struct tree + switch (n_op_mode) { + case M_INSERT: + if (n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0)) +- reiserfs_panic(p_s_tb->tb_sb, +- "PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert", +- n_item_num, B_NR_ITEMS(p_s_tbS0)); ++ reiserfs_panic(p_s_tb->tb_sb, "PAP-8330", "Incorrect " ++ "item number %d (in S0 - %d) in case " ++ "of insert", n_item_num, ++ B_NR_ITEMS(p_s_tbS0)); + break; + case M_PASTE: + case M_DELETE: + case M_CUT: + if (n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0)) { + print_block(p_s_tbS0, 0, -1, -1); +- reiserfs_panic(p_s_tb->tb_sb, +- "PAP-8335: fix_nodes: Incorrect item number(%d); mode = %c insert_size = %d\n", ++ reiserfs_panic(p_s_tb->tb_sb, "PAP-8335", "Incorrect " ++ "item number(%d); mode = %c " ++ "insert_size = %d", + n_item_num, n_op_mode, + p_s_tb->insert_size[0]); + } + break; + default: +- reiserfs_panic(p_s_tb->tb_sb, +- "PAP-8340: fix_nodes: Incorrect mode of operation"); ++ reiserfs_panic(p_s_tb->tb_sb, "PAP-8340", "Incorrect mode " ++ "of operation"); + } + #endif + +--- a/fs/reiserfs/ibalance.c ++++ b/fs/reiserfs/ibalance.c +@@ -105,8 +105,8 @@ static void internal_define_dest_src_inf + break; + + default: +- reiserfs_panic(tb->tb_sb, +- "internal_define_dest_src_infos: shift type is unknown (%d)", ++ reiserfs_panic(tb->tb_sb, "ibalance-1", ++ "shift type is unknown (%d)", + shift_mode); + } + } +@@ -702,8 +702,8 @@ static void balance_internal_when_delete + + return; + } +- reiserfs_panic(tb->tb_sb, +- "balance_internal_when_delete: unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d", ++ reiserfs_panic(tb->tb_sb, "ibalance-2", ++ "unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d", + h, tb->lnum[h], h, tb->rnum[h]); + } + +@@ -940,8 +940,8 @@ int balance_internal(struct tree_balance + struct block_head *blkh; + + if (tb->blknum[h] != 1) +- reiserfs_panic(NULL, +- "balance_internal: One new node required for creating the new root"); ++ reiserfs_panic(NULL, "ibalance-3", "One new node " ++ "required for creating the new root"); + /* S[h] = empty buffer from the list FEB. */ + tbSh = get_FEB(tb); + blkh = B_BLK_HEAD(tbSh); +--- a/fs/reiserfs/inode.c ++++ b/fs/reiserfs/inode.c +@@ -1300,8 +1300,7 @@ static void update_stat_data(struct tree + ih = PATH_PITEM_HEAD(path); + + if (!is_statdata_le_ih(ih)) +- reiserfs_panic(inode->i_sb, +- "vs-13065: update_stat_data: key %k, found item %h", ++ reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h", + INODE_PKEY(inode), ih); + + if (stat_data_v1(ih)) { +--- a/fs/reiserfs/item_ops.c ++++ b/fs/reiserfs/item_ops.c +@@ -517,8 +517,9 @@ static int direntry_create_vi(struct vir + ((is_affected + && (vn->vn_mode == M_PASTE + || vn->vn_mode == M_CUT)) ? insert_size : 0)) { +- reiserfs_panic(NULL, +- "vs-8025: set_entry_sizes: (mode==%c, insert_size==%d), invalid length of directory item", ++ reiserfs_panic(NULL, "vs-8025", "(mode==%c, " ++ "insert_size==%d), invalid length of " ++ "directory item", + vn->vn_mode, insert_size); + } + } +@@ -549,7 +550,8 @@ static int direntry_check_left(struct vi + } + + if (entries == dir_u->entry_count) { +- reiserfs_panic(NULL, "free space %d, entry_count %d\n", free, ++ reiserfs_panic(NULL, "item_ops-1", ++ "free space %d, entry_count %d", free, + dir_u->entry_count); + } + +--- a/fs/reiserfs/journal.c ++++ b/fs/reiserfs/journal.c +@@ -436,8 +436,8 @@ void reiserfs_check_lock_depth(struct su + { + #ifdef CONFIG_SMP + if (current->lock_depth < 0) { +- reiserfs_panic(sb, "%s called without kernel lock held", +- caller); ++ reiserfs_panic(sb, "journal-1", "%s called without kernel " ++ "lock held", caller); + } + #else + ; +@@ -574,7 +574,7 @@ static inline void put_journal_list(stru + struct reiserfs_journal_list *jl) + { + if (jl->j_refcount < 1) { +- reiserfs_panic(s, "trans id %u, refcount at %d", ++ reiserfs_panic(s, "journal-2", "trans id %u, refcount at %d", + jl->j_trans_id, jl->j_refcount); + } + if (--jl->j_refcount == 0) +@@ -1416,8 +1416,7 @@ static int flush_journal_list(struct sup + + count = 0; + if (j_len_saved > journal->j_trans_max) { +- reiserfs_panic(s, +- "journal-715: flush_journal_list, length is %lu, trans id %lu\n", ++ reiserfs_panic(s, "journal-715", "length is %lu, trans id %lu", + j_len_saved, jl->j_trans_id); + return 0; + } +@@ -1449,8 +1448,8 @@ static int flush_journal_list(struct sup + ** or wait on a more recent transaction, or just ignore it + */ + if (atomic_read(&(journal->j_wcount)) != 0) { +- reiserfs_panic(s, +- "journal-844: panic journal list is flushing, wcount is not 0\n"); ++ reiserfs_panic(s, "journal-844", "journal list is flushing, " ++ "wcount is not 0"); + } + cn = jl->j_realblock; + while (cn) { +@@ -1551,13 +1550,13 @@ static int flush_journal_list(struct sup + while (cn) { + if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) { + if (!cn->bh) { +- reiserfs_panic(s, +- "journal-1011: cn->bh is NULL\n"); ++ reiserfs_panic(s, "journal-1011", ++ "cn->bh is NULL"); + } + wait_on_buffer(cn->bh); + if (!cn->bh) { +- reiserfs_panic(s, +- "journal-1012: cn->bh is NULL\n"); ++ reiserfs_panic(s, "journal-1012", ++ "cn->bh is NULL"); + } + if (unlikely(!buffer_uptodate(cn->bh))) { + #ifdef CONFIG_REISERFS_CHECK +@@ -3252,8 +3251,8 @@ int journal_mark_dirty(struct reiserfs_t + + PROC_INFO_INC(p_s_sb, journal.mark_dirty); + if (th->t_trans_id != journal->j_trans_id) { +- reiserfs_panic(th->t_super, +- "journal-1577: handle trans id %ld != current trans id %ld\n", ++ reiserfs_panic(th->t_super, "journal-1577", ++ "handle trans id %ld != current trans id %ld", + th->t_trans_id, journal->j_trans_id); + } + +@@ -3292,8 +3291,8 @@ int journal_mark_dirty(struct reiserfs_t + ** Nothing can be done here, except make the FS readonly or panic. + */ + if (journal->j_len >= journal->j_trans_max) { +- reiserfs_panic(th->t_super, +- "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n", ++ reiserfs_panic(th->t_super, "journal-1413", ++ "j_len (%lu) is too big", + journal->j_len); + } + +@@ -3313,7 +3312,8 @@ int journal_mark_dirty(struct reiserfs_t + if (!cn) { + cn = get_cnode(p_s_sb); + if (!cn) { +- reiserfs_panic(p_s_sb, "get_cnode failed!\n"); ++ reiserfs_panic(p_s_sb, "journal-4", ++ "get_cnode failed!"); + } + + if (th->t_blocks_logged == th->t_blocks_allocated) { +@@ -3581,8 +3581,8 @@ static int check_journal_end(struct reis + BUG_ON(!th->t_trans_id); + + if (th->t_trans_id != journal->j_trans_id) { +- reiserfs_panic(th->t_super, +- "journal-1577: handle trans id %ld != current trans id %ld\n", ++ reiserfs_panic(th->t_super, "journal-1577", ++ "handle trans id %ld != current trans id %ld", + th->t_trans_id, journal->j_trans_id); + } + +@@ -3661,8 +3661,8 @@ static int check_journal_end(struct reis + } + + if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { +- reiserfs_panic(p_s_sb, +- "journal-003: journal_end: j_start (%ld) is too high\n", ++ reiserfs_panic(p_s_sb, "journal-003", ++ "j_start (%ld) is too high", + journal->j_start); + } + return 1; +@@ -3707,8 +3707,8 @@ int journal_mark_freed(struct reiserfs_t + /* set the bit for this block in the journal bitmap for this transaction */ + jb = journal->j_current_jl->j_list_bitmap; + if (!jb) { +- reiserfs_panic(p_s_sb, +- "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n"); ++ reiserfs_panic(p_s_sb, "journal-1702", ++ "journal_list_bitmap is NULL"); + } + set_bit_in_list_bitmap(p_s_sb, blocknr, jb); + +@@ -4063,8 +4063,8 @@ static int do_journal_end(struct reiserf + if (buffer_journaled(cn->bh)) { + jl_cn = get_cnode(p_s_sb); + if (!jl_cn) { +- reiserfs_panic(p_s_sb, +- "journal-1676, get_cnode returned NULL\n"); ++ reiserfs_panic(p_s_sb, "journal-1676", ++ "get_cnode returned NULL"); + } + if (i == 0) { + jl->j_realblock = jl_cn; +@@ -4080,8 +4080,9 @@ static int do_journal_end(struct reiserf + + if (is_block_in_log_or_reserved_area + (p_s_sb, cn->bh->b_blocknr)) { +- reiserfs_panic(p_s_sb, +- "journal-2332: Trying to log block %lu, which is a log block\n", ++ reiserfs_panic(p_s_sb, "journal-2332", ++ "Trying to log block %lu, " ++ "which is a log block", + cn->bh->b_blocknr); + } + jl_cn->blocknr = cn->bh->b_blocknr; +@@ -4265,8 +4266,8 @@ static int do_journal_end(struct reiserf + get_list_bitmap(p_s_sb, journal->j_current_jl); + + if (!(journal->j_current_jl->j_list_bitmap)) { +- reiserfs_panic(p_s_sb, +- "journal-1996: do_journal_end, could not get a list bitmap\n"); ++ reiserfs_panic(p_s_sb, "journal-1996", ++ "could not get a list bitmap"); + } + + atomic_set(&(journal->j_jlock), 0); +--- a/fs/reiserfs/lbalance.c ++++ b/fs/reiserfs/lbalance.c +@@ -168,10 +168,11 @@ static int leaf_copy_boundary_item(struc + if (bytes_or_entries == ih_item_len(ih) + && is_indirect_le_ih(ih)) + if (get_ih_free_space(ih)) +- reiserfs_panic(NULL, +- "vs-10020: leaf_copy_boundary_item: " +- "last unformatted node must be filled entirely (%h)", +- ih); ++ reiserfs_panic(sb_from_bi(dest_bi), ++ "vs-10020", ++ "last unformatted node " ++ "must be filled " ++ "entirely (%h)", ih); + } + #endif + +@@ -622,9 +623,8 @@ static void leaf_define_dest_src_infos(i + break; + + default: +- reiserfs_panic(NULL, +- "vs-10250: leaf_define_dest_src_infos: shift type is unknown (%d)", +- shift_mode); ++ reiserfs_panic(sb_from_bi(src_bi), "vs-10250", ++ "shift type is unknown (%d)", shift_mode); + } + RFALSE(!src_bi->bi_bh || !dest_bi->bi_bh, + "vs-10260: mode==%d, source (%p) or dest (%p) buffer is initialized incorrectly", +@@ -674,9 +674,9 @@ int leaf_shift_left(struct tree_balance + #ifdef CONFIG_REISERFS_CHECK + if (tb->tb_mode == M_PASTE || tb->tb_mode == M_INSERT) { + print_cur_tb("vs-10275"); +- reiserfs_panic(tb->tb_sb, +- "vs-10275: leaf_shift_left: balance condition corrupted (%c)", +- tb->tb_mode); ++ reiserfs_panic(tb->tb_sb, "vs-10275", ++ "balance condition corrupted " ++ "(%c)", tb->tb_mode); + } + #endif + +@@ -889,9 +889,12 @@ void leaf_paste_in_buffer(struct buffer_ + + #ifdef CONFIG_REISERFS_CHECK + if (zeros_number > paste_size) { ++ struct super_block *sb = NULL; ++ if (bi && bi->tb) ++ sb = bi->tb->tb_sb; + print_cur_tb("10177"); +- reiserfs_panic(NULL, +- "vs-10177: leaf_paste_in_buffer: ero number == %d, paste_size == %d", ++ reiserfs_panic(sb, "vs-10177", ++ "zeros_number == %d, paste_size == %d", + zeros_number, paste_size); + } + #endif /* CONFIG_REISERFS_CHECK */ +--- a/fs/reiserfs/namei.c ++++ b/fs/reiserfs/namei.c +@@ -145,10 +145,9 @@ int search_by_entry_key(struct super_blo + if (!is_direntry_le_ih(de->de_ih) || + COMP_SHORT_KEYS(&(de->de_ih->ih_key), key)) { + print_block(de->de_bh, 0, -1, -1); +- reiserfs_panic(sb, +- "vs-7005: search_by_entry_key: found item %h is not directory item or " +- "does not belong to the same directory as key %K", +- de->de_ih, key); ++ reiserfs_panic(sb, "vs-7005", "found item %h is not directory " ++ "item or does not belong to the same directory " ++ "as key %K", de->de_ih, key); + } + #endif /* CONFIG_REISERFS_CHECK */ + +@@ -1194,15 +1193,14 @@ static int entry_points_to_object(const + + if (inode) { + if (!de_visible(de->de_deh + de->de_entry_num)) +- reiserfs_panic(NULL, +- "vs-7042: entry_points_to_object: entry must be visible"); ++ reiserfs_panic(inode->i_sb, "vs-7042", ++ "entry must be visible"); + return (de->de_objectid == inode->i_ino) ? 1 : 0; + } + + /* this must be added hidden entry */ + if (de_visible(de->de_deh + de->de_entry_num)) +- reiserfs_panic(NULL, +- "vs-7043: entry_points_to_object: entry must be visible"); ++ reiserfs_panic(NULL, "vs-7043", "entry must be visible"); + + return 1; + } +@@ -1316,8 +1314,8 @@ static int reiserfs_rename(struct inode + new_dentry->d_name.len, old_inode, 0); + if (retval == -EEXIST) { + if (!new_dentry_inode) { +- reiserfs_panic(old_dir->i_sb, +- "vs-7050: new entry is found, new inode == 0\n"); ++ reiserfs_panic(old_dir->i_sb, "vs-7050", ++ "new entry is found, new inode == 0"); + } + } else if (retval) { + int err = journal_end(&th, old_dir->i_sb, jbegin_count); +--- a/fs/reiserfs/objectid.c ++++ b/fs/reiserfs/objectid.c +@@ -18,8 +18,7 @@ + static void check_objectid_map(struct super_block *s, __le32 * map) + { + if (le32_to_cpu(map[0]) != 1) +- reiserfs_panic(s, +- "vs-15010: check_objectid_map: map corrupted: %lx", ++ reiserfs_panic(s, "vs-15010", "map corrupted: %lx", + (long unsigned int)le32_to_cpu(map[0])); + + // FIXME: add something else here +--- a/fs/reiserfs/prints.c ++++ b/fs/reiserfs/prints.c +@@ -356,14 +356,21 @@ void reiserfs_debug(struct super_block * + extern struct tree_balance *cur_tb; + #endif + +-void reiserfs_panic(struct super_block *sb, const char *fmt, ...) ++void __reiserfs_panic(struct super_block *sb, const char *id, ++ const char *function, const char *fmt, ...) + { + do_reiserfs_warning(fmt); + ++#ifdef CONFIG_REISERFS_CHECK + dump_stack(); +- +- panic(KERN_EMERG "REISERFS: panic (device %s): %s\n", +- reiserfs_bdevname(sb), error_buf); ++#endif ++ if (sb) ++ panic(KERN_WARNING "REISERFS panic (device %s): %s%s%s: %s\n", ++ sb->s_id, id ? id : "", id ? " " : "", ++ function, error_buf); ++ else ++ panic(KERN_WARNING "REISERFS panic: %s%s%s: %s\n", ++ id ? id : "", id ? " " : "", function, error_buf); + } + + void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...) +@@ -684,12 +691,10 @@ static void check_leaf_block_head(struct + blkh = B_BLK_HEAD(bh); + nr = blkh_nr_item(blkh); + if (nr > (bh->b_size - BLKH_SIZE) / IH_SIZE) +- reiserfs_panic(NULL, +- "vs-6010: check_leaf_block_head: invalid item number %z", ++ reiserfs_panic(NULL, "vs-6010", "invalid item number %z", + bh); + if (blkh_free_space(blkh) > bh->b_size - BLKH_SIZE - IH_SIZE * nr) +- reiserfs_panic(NULL, +- "vs-6020: check_leaf_block_head: invalid free space %z", ++ reiserfs_panic(NULL, "vs-6020", "invalid free space %z", + bh); + + } +@@ -700,21 +705,15 @@ static void check_internal_block_head(st + + blkh = B_BLK_HEAD(bh); + if (!(B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL(bh) <= MAX_HEIGHT)) +- reiserfs_panic(NULL, +- "vs-6025: check_internal_block_head: invalid level %z", +- bh); ++ reiserfs_panic(NULL, "vs-6025", "invalid level %z", bh); + + if (B_NR_ITEMS(bh) > (bh->b_size - BLKH_SIZE) / IH_SIZE) +- reiserfs_panic(NULL, +- "vs-6030: check_internal_block_head: invalid item number %z", +- bh); ++ reiserfs_panic(NULL, "vs-6030", "invalid item number %z", bh); + + if (B_FREE_SPACE(bh) != + bh->b_size - BLKH_SIZE - KEY_SIZE * B_NR_ITEMS(bh) - + DC_SIZE * (B_NR_ITEMS(bh) + 1)) +- reiserfs_panic(NULL, +- "vs-6040: check_internal_block_head: invalid free space %z", +- bh); ++ reiserfs_panic(NULL, "vs-6040", "invalid free space %z", bh); + + } + +--- a/fs/reiserfs/stree.c ++++ b/fs/reiserfs/stree.c +@@ -366,9 +366,8 @@ inline void decrement_bcount(struct buff + put_bh(p_s_bh); + return; + } +- reiserfs_panic(NULL, +- "PAP-5070: decrement_bcount: trying to free free buffer %b", +- p_s_bh); ++ reiserfs_panic(NULL, "PAP-5070", ++ "trying to free free buffer %b", p_s_bh); + } + } + +@@ -713,8 +712,8 @@ int search_by_key(struct super_block *p_ + #ifdef CONFIG_REISERFS_CHECK + if (cur_tb) { + print_cur_tb("5140"); +- reiserfs_panic(p_s_sb, +- "PAP-5140: search_by_key: schedule occurred in do_balance!"); ++ reiserfs_panic(p_s_sb, "PAP-5140", ++ "schedule occurred in do_balance!"); + } + #endif + +@@ -1511,8 +1510,8 @@ static void indirect_to_direct_roll_back + /* look for the last byte of the tail */ + if (search_for_position_by_key(inode->i_sb, &tail_key, path) == + POSITION_NOT_FOUND) +- reiserfs_panic(inode->i_sb, +- "vs-5615: indirect_to_direct_roll_back: found invalid item"); ++ reiserfs_panic(inode->i_sb, "vs-5615", ++ "found invalid item"); + RFALSE(path->pos_in_item != + ih_item_len(PATH_PITEM_HEAD(path)) - 1, + "vs-5616: appended bytes found"); +@@ -1612,8 +1611,8 @@ int reiserfs_cut_from_item(struct reiser + print_block(PATH_PLAST_BUFFER(p_s_path), 3, + PATH_LAST_POSITION(p_s_path) - 1, + PATH_LAST_POSITION(p_s_path) + 1); +- reiserfs_panic(p_s_sb, +- "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%K)", ++ reiserfs_panic(p_s_sb, "PAP-5580", "item to " ++ "convert does not exist (%K)", + p_s_item_key); + } + continue; +@@ -1693,22 +1692,20 @@ int reiserfs_cut_from_item(struct reiser + sure, that we exactly remove last unformatted node pointer + of the item */ + if (!is_indirect_le_ih(le_ih)) +- reiserfs_panic(p_s_sb, +- "vs-5652: reiserfs_cut_from_item: " ++ reiserfs_panic(p_s_sb, "vs-5652", + "item must be indirect %h", le_ih); + + if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE) +- reiserfs_panic(p_s_sb, +- "vs-5653: reiserfs_cut_from_item: " +- "completing indirect2direct conversion indirect item %h " +- "being deleted must be of 4 byte long", +- le_ih); ++ reiserfs_panic(p_s_sb, "vs-5653", "completing " ++ "indirect2direct conversion indirect " ++ "item %h being deleted must be of " ++ "4 byte long", le_ih); + + if (c_mode == M_CUT + && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) { +- reiserfs_panic(p_s_sb, +- "vs-5654: reiserfs_cut_from_item: " +- "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)", ++ reiserfs_panic(p_s_sb, "vs-5654", "can not complete " ++ "indirect2direct conversion of %h " ++ "(CUT, insert_size==%d)", + le_ih, s_cut_balance.insert_size[0]); + } + /* it would be useful to make sure, that right neighboring +@@ -1923,10 +1920,10 @@ static void check_research_for_paste(str + || op_bytes_number(found_ih, + get_last_bh(path)->b_size) != + pos_in_item(path)) +- reiserfs_panic(NULL, +- "PAP-5720: check_research_for_paste: " +- "found direct item %h or position (%d) does not match to key %K", +- found_ih, pos_in_item(path), p_s_key); ++ reiserfs_panic(NULL, "PAP-5720", "found direct item " ++ "%h or position (%d) does not match " ++ "to key %K", found_ih, ++ pos_in_item(path), p_s_key); + } + if (is_indirect_le_ih(found_ih)) { + if (le_ih_k_offset(found_ih) + +@@ -1935,9 +1932,9 @@ static void check_research_for_paste(str + cpu_key_k_offset(p_s_key) + || I_UNFM_NUM(found_ih) != pos_in_item(path) + || get_ih_free_space(found_ih) != 0) +- reiserfs_panic(NULL, +- "PAP-5730: check_research_for_paste: " +- "found indirect item (%h) or position (%d) does not match to key (%K)", ++ reiserfs_panic(NULL, "PAP-5730", "found indirect " ++ "item (%h) or position (%d) does not " ++ "match to key (%K)", + found_ih, pos_in_item(path), p_s_key); + } + } +--- a/fs/reiserfs/tail_conversion.c ++++ b/fs/reiserfs/tail_conversion.c +@@ -92,8 +92,7 @@ int direct2indirect(struct reiserfs_tran + last item of the file */ + if (search_for_position_by_key(sb, &end_key, path) == + POSITION_FOUND) +- reiserfs_panic(sb, +- "PAP-14050: direct2indirect: " ++ reiserfs_panic(sb, "PAP-14050", + "direct item (%K) not found", &end_key); + p_le_ih = PATH_PITEM_HEAD(path); + RFALSE(!is_direct_le_ih(p_le_ih), +@@ -214,8 +213,7 @@ int indirect2direct(struct reiserfs_tran + /* re-search indirect item */ + if (search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) + == POSITION_NOT_FOUND) +- reiserfs_panic(p_s_sb, +- "PAP-5520: indirect2direct: " ++ reiserfs_panic(p_s_sb, "PAP-5520", + "item to be converted %K does not exist", + p_s_item_key); + copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); +@@ -224,8 +222,8 @@ int indirect2direct(struct reiserfs_tran + (ih_item_len(&s_ih) / UNFM_P_SIZE - + 1) * p_s_sb->s_blocksize; + if (pos != pos1) +- reiserfs_panic(p_s_sb, "vs-5530: indirect2direct: " +- "tail position changed while we were reading it"); ++ reiserfs_panic(p_s_sb, "vs-5530", "tail position " ++ "changed while we were reading it"); + #endif + } + +--- a/include/linux/reiserfs_fs.h ++++ b/include/linux/reiserfs_fs.h +@@ -86,11 +86,14 @@ void __reiserfs_warning(struct super_blo + /* assertions handling */ + + /** always check a condition and panic if it's false. */ +-#define __RASSERT( cond, scond, format, args... ) \ +-if( !( cond ) ) \ +- reiserfs_panic( NULL, "reiserfs[%i]: assertion " scond " failed at " \ +- __FILE__ ":%i:%s: " format "\n", \ +- in_interrupt() ? -1 : task_pid_nr(current), __LINE__ , __FUNCTION__ , ##args ) ++#define __RASSERT(cond, scond, format, args...) \ ++do { \ ++ if (!(cond)) \ ++ reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \ ++ __FILE__ ":%i:%s: " format "\n", \ ++ in_interrupt() ? -1 : task_pid_nr(current), \ ++ __LINE__, __func__ , ##args); \ ++} while (0) + + #define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args) + +@@ -1448,6 +1451,16 @@ struct buffer_info { + int bi_position; + }; + ++static inline struct super_block *sb_from_tb(struct tree_balance *tb) ++{ ++ return tb ? tb->tb_sb : NULL; ++} ++ ++static inline struct super_block *sb_from_bi(struct buffer_info *bi) ++{ ++ return bi ? sb_from_tb(bi->tb) : NULL; ++} ++ + /* there are 4 types of items: stat data, directory item, indirect, direct. + +-------------------+------------+--------------+------------+ + | | k_offset | k_uniqueness | mergeable? | +@@ -1988,8 +2001,11 @@ int fix_nodes(int n_op_mode, struct tree + void unfix_nodes(struct tree_balance *); + + /* prints.c */ +-void reiserfs_panic(struct super_block *s, const char *fmt, ...) ++void __reiserfs_panic(struct super_block *s, const char *id, ++ const char *function, const char *fmt, ...) + __attribute__ ((noreturn)); ++#define reiserfs_panic(s, id, fmt, args...) \ ++ __reiserfs_panic(s, id, __func__, fmt, ##args) + void reiserfs_info(struct super_block *s, const char *fmt, ...); + void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...); + void print_indirect_item(struct buffer_head *bh, int item_num); diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-remove-i_has_xattr_dir.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-remove-i_has_xattr_dir.diff new file mode 100644 index 0000000000..f04a332ec7 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-remove-i_has_xattr_dir.diff @@ -0,0 +1,51 @@ +From: Jeff Mahoney +Subject: reiserfs: remove i_has_xattr_dir + + With the changes to xattr root locking, the i_has_xattr_dir flag + is no longer needed. This patch removes it. + +Signed-off-by: Jeff Mahoney + +--- + fs/reiserfs/xattr.c | 9 +-------- + 1 file changed, 1 insertion(+), 8 deletions(-) + +--- a/fs/reiserfs/xattr.c ++++ b/fs/reiserfs/xattr.c +@@ -459,10 +459,7 @@ int reiserfs_delete_xattrs(struct inode + + dput(root); + out: +- if (!err) +- REISERFS_I(inode)->i_flags = +- REISERFS_I(inode)->i_flags & ~i_has_xattr_dir; +- else ++ if (err) + reiserfs_warning(inode->i_sb, "jdm-20004", + "Couldn't remove all xattrs (%d)\n", err); + return err; +@@ -660,7 +657,6 @@ reiserfs_xattr_set(struct inode *inode, + down_write(&REISERFS_I(inode)->i_xattr_sem); + + xahash = xattr_hash(buffer, buffer_size); +- REISERFS_I(inode)->i_flags |= i_has_xattr_dir; + + /* Resize it so we're ok to write there */ + newattrs.ia_size = buffer_size; +@@ -769,7 +765,6 @@ reiserfs_xattr_get(const struct inode *i + down_read(&REISERFS_I(inode)->i_xattr_sem); + + isize = i_size_read(dentry->d_inode); +- REISERFS_I(inode)->i_flags |= i_has_xattr_dir; + + /* Just return the size needed */ + if (buffer == NULL) { +@@ -999,8 +994,6 @@ ssize_t reiserfs_listxattr(struct dentry + buf.r_pos = 0; + buf.r_inode = dentry->d_inode; + +- REISERFS_I(dentry->d_inode)->i_flags |= i_has_xattr_dir; +- + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); + err = xattr_readdir(dir->d_inode, reiserfs_listxattr_filler, &buf); + mutex_unlock(&dir->d_inode->i_mutex); diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-remove-link-detection.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-remove-link-detection.diff new file mode 100644 index 0000000000..3d4bbfc315 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-remove-link-detection.diff @@ -0,0 +1,42 @@ +From: Jeff Mahoney +Subject: reiserfs: remove link detection code + + Early in the reiserfs xattr development, there was a plan to use hardlinks + to save disk space for identical xattrs. That code never materialized and + isn't going to, so this patch removes the detection code. + +Signed-off-by: Jeff Mahoney + +--- + fs/reiserfs/xattr.c | 13 ------------- + 1 file changed, 13 deletions(-) + +--- a/fs/reiserfs/xattr.c ++++ b/fs/reiserfs/xattr.c +@@ -432,7 +432,6 @@ reiserfs_xattr_set(struct inode *inode, + if (buffer && buffer_size) + xahash = xattr_hash(buffer, buffer_size); + +- open_file: + dentry = get_xa_file_dentry(inode, name, flags); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); +@@ -441,18 +440,6 @@ reiserfs_xattr_set(struct inode *inode, + + REISERFS_I(inode)->i_flags |= i_has_xattr_dir; + +- /* we need to copy it off.. */ +- if (dentry->d_inode->i_nlink > 1) { +- dput(dentry); +- err = reiserfs_xattr_del(inode, name); +- if (err < 0) +- goto out; +- /* We just killed the old one, we're not replacing anymore */ +- if (flags & XATTR_REPLACE) +- flags &= ~XATTR_REPLACE; +- goto open_file; +- } +- + /* Resize it so we're ok to write there */ + newattrs.ia_size = buffer_size; + newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-._.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-._.diff new file mode 100644 index 0000000000..bfb241bb67 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-._.diff @@ -0,0 +1,1991 @@ +From: Jeff Mahoney +Subject: reiserfs: rename [cn]_* variables + + This patch renames n_, c_, etc variables to something more sane. This is + the sixth in a series of patches to rip out some of the awful variable + naming in reiserfs. + +Signed-off-by: Jeff Mahoney + +--- + + fs/reiserfs/file.c | 6 + fs/reiserfs/fix_node.c | 474 +++++++++++++++++++++--------------------- + fs/reiserfs/stree.c | 370 ++++++++++++++++---------------- + fs/reiserfs/tail_conversion.c | 30 +- + 4 files changed, 438 insertions(+), 442 deletions(-) + +--- a/fs/reiserfs/file.c ++++ b/fs/reiserfs/file.c +@@ -138,11 +138,11 @@ static int reiserfs_sync_file(struct fil + struct dentry *dentry, int datasync) + { + struct inode *inode = dentry->d_inode; +- int n_err; ++ int err; + int barrier_done; + + BUG_ON(!S_ISREG(inode->i_mode)); +- n_err = sync_mapping_buffers(inode->i_mapping); ++ err = sync_mapping_buffers(inode->i_mapping); + reiserfs_write_lock(inode->i_sb); + barrier_done = reiserfs_commit_for_inode(inode); + reiserfs_write_unlock(inode->i_sb); +@@ -150,7 +150,7 @@ static int reiserfs_sync_file(struct fil + blkdev_issue_flush(inode->i_sb->s_bdev, NULL); + if (barrier_done < 0) + return barrier_done; +- return (n_err < 0) ? -EIO : 0; ++ return (err < 0) ? -EIO : 0; + } + + /* taken fs/buffer.c:__block_commit_write */ +--- a/fs/reiserfs/fix_node.c ++++ b/fs/reiserfs/fix_node.c +@@ -751,24 +751,24 @@ else \ + + static void free_buffers_in_tb(struct tree_balance *tb) + { +- int n_counter; ++ int i; + + pathrelse(tb->tb_path); + +- for (n_counter = 0; n_counter < MAX_HEIGHT; n_counter++) { +- brelse(tb->L[n_counter]); +- brelse(tb->R[n_counter]); +- brelse(tb->FL[n_counter]); +- brelse(tb->FR[n_counter]); +- brelse(tb->CFL[n_counter]); +- brelse(tb->CFR[n_counter]); +- +- tb->L[n_counter] = NULL; +- tb->R[n_counter] = NULL; +- tb->FL[n_counter] = NULL; +- tb->FR[n_counter] = NULL; +- tb->CFL[n_counter] = NULL; +- tb->CFR[n_counter] = NULL; ++ for (i = 0; i < MAX_HEIGHT; i++) { ++ brelse(tb->L[i]); ++ brelse(tb->R[i]); ++ brelse(tb->FL[i]); ++ brelse(tb->FR[i]); ++ brelse(tb->CFL[i]); ++ brelse(tb->CFR[i]); ++ ++ tb->L[i] = NULL; ++ tb->R[i] = NULL; ++ tb->FL[i] = NULL; ++ tb->FR[i] = NULL; ++ tb->CFL[i] = NULL; ++ tb->CFR[i] = NULL; + } + } + +@@ -778,13 +778,13 @@ static void free_buffers_in_tb(struct tr + * NO_DISK_SPACE - no disk space. + */ + /* The function is NOT SCHEDULE-SAFE! */ +-static int get_empty_nodes(struct tree_balance *tb, int n_h) ++static int get_empty_nodes(struct tree_balance *tb, int h) + { + struct buffer_head *new_bh, +- *Sh = PATH_H_PBUFFER(tb->tb_path, n_h); +- b_blocknr_t *blocknr, a_n_blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; +- int n_counter, n_number_of_freeblk, n_amount_needed, /* number of needed empty blocks */ +- n_retval = CARRY_ON; ++ *Sh = PATH_H_PBUFFER(tb->tb_path, h); ++ b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; ++ int counter, number_of_freeblk, amount_needed, /* number of needed empty blocks */ ++ retval = CARRY_ON; + struct super_block *sb = tb->tb_sb; + + /* number_of_freeblk is the number of empty blocks which have been +@@ -793,7 +793,7 @@ static int get_empty_nodes(struct tree_b + number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs + after empty blocks are acquired, and the balancing analysis is + then restarted, amount_needed is the number needed by this level +- (n_h) of the balancing analysis. ++ (h) of the balancing analysis. + + Note that for systems with many processes writing, it would be + more layout optimal to calculate the total number needed by all +@@ -801,31 +801,31 @@ static int get_empty_nodes(struct tree_b + + /* Initiate number_of_freeblk to the amount acquired prior to the restart of + the analysis or 0 if not restarted, then subtract the amount needed +- by all of the levels of the tree below n_h. */ +- /* blknum includes S[n_h], so we subtract 1 in this calculation */ +- for (n_counter = 0, n_number_of_freeblk = tb->cur_blknum; +- n_counter < n_h; n_counter++) +- n_number_of_freeblk -= +- (tb->blknum[n_counter]) ? (tb->blknum[n_counter] - ++ by all of the levels of the tree below h. */ ++ /* blknum includes S[h], so we subtract 1 in this calculation */ ++ for (counter = 0, number_of_freeblk = tb->cur_blknum; ++ counter < h; counter++) ++ number_of_freeblk -= ++ (tb->blknum[counter]) ? (tb->blknum[counter] - + 1) : 0; + + /* Allocate missing empty blocks. */ + /* if Sh == 0 then we are getting a new root */ +- n_amount_needed = (Sh) ? (tb->blknum[n_h] - 1) : 1; ++ amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1; + /* Amount_needed = the amount that we need more than the amount that we have. */ +- if (n_amount_needed > n_number_of_freeblk) +- n_amount_needed -= n_number_of_freeblk; ++ if (amount_needed > number_of_freeblk) ++ amount_needed -= number_of_freeblk; + else /* If we have enough already then there is nothing to do. */ + return CARRY_ON; + + /* No need to check quota - is not allocated for blocks used for formatted nodes */ +- if (reiserfs_new_form_blocknrs(tb, a_n_blocknrs, +- n_amount_needed) == NO_DISK_SPACE) ++ if (reiserfs_new_form_blocknrs(tb, blocknrs, ++ amount_needed) == NO_DISK_SPACE) + return NO_DISK_SPACE; + + /* for each blocknumber we just got, get a buffer and stick it on FEB */ +- for (blocknr = a_n_blocknrs, n_counter = 0; +- n_counter < n_amount_needed; blocknr++, n_counter++) { ++ for (blocknr = blocknrs, counter = 0; ++ counter < amount_needed; blocknr++, counter++) { + + RFALSE(!*blocknr, + "PAP-8135: reiserfs_new_blocknrs failed when got new blocks"); +@@ -845,10 +845,10 @@ static int get_empty_nodes(struct tree_b + tb->FEB[tb->cur_blknum++] = new_bh; + } + +- if (n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB(tb)) +- n_retval = REPEAT_SEARCH; ++ if (retval == CARRY_ON && FILESYSTEM_CHANGED_TB(tb)) ++ retval = REPEAT_SEARCH; + +- return n_retval; ++ return retval; + } + + /* Get free space of the left neighbor, which is stored in the parent +@@ -896,36 +896,36 @@ static int get_rfree(struct tree_balance + } + + /* Check whether left neighbor is in memory. */ +-static int is_left_neighbor_in_cache(struct tree_balance *tb, int n_h) ++static int is_left_neighbor_in_cache(struct tree_balance *tb, int h) + { + struct buffer_head *father, *left; + struct super_block *sb = tb->tb_sb; +- b_blocknr_t n_left_neighbor_blocknr; +- int n_left_neighbor_position; ++ b_blocknr_t left_neighbor_blocknr; ++ int left_neighbor_position; + + /* Father of the left neighbor does not exist. */ +- if (!tb->FL[n_h]) ++ if (!tb->FL[h]) + return 0; + + /* Calculate father of the node to be balanced. */ +- father = PATH_H_PBUFFER(tb->tb_path, n_h + 1); ++ father = PATH_H_PBUFFER(tb->tb_path, h + 1); + + RFALSE(!father || + !B_IS_IN_TREE(father) || +- !B_IS_IN_TREE(tb->FL[n_h]) || ++ !B_IS_IN_TREE(tb->FL[h]) || + !buffer_uptodate(father) || +- !buffer_uptodate(tb->FL[n_h]), ++ !buffer_uptodate(tb->FL[h]), + "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", +- father, tb->FL[n_h]); ++ father, tb->FL[h]); + + /* Get position of the pointer to the left neighbor into the left father. */ +- n_left_neighbor_position = (father == tb->FL[n_h]) ? +- tb->lkey[n_h] : B_NR_ITEMS(tb->FL[n_h]); ++ left_neighbor_position = (father == tb->FL[h]) ? ++ tb->lkey[h] : B_NR_ITEMS(tb->FL[h]); + /* Get left neighbor block number. */ +- n_left_neighbor_blocknr = +- B_N_CHILD_NUM(tb->FL[n_h], n_left_neighbor_position); ++ left_neighbor_blocknr = ++ B_N_CHILD_NUM(tb->FL[h], left_neighbor_position); + /* Look for the left neighbor in the cache. */ +- if ((left = sb_find_get_block(sb, n_left_neighbor_blocknr))) { ++ if ((left = sb_find_get_block(sb, left_neighbor_blocknr))) { + + RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left), + "vs-8170: left neighbor (%b %z) is not in the tree", +@@ -955,7 +955,7 @@ static void decrement_key(struct cpu_key + * CARRY_ON - schedule didn't occur while the function worked; + */ + static int get_far_parent(struct tree_balance *tb, +- int n_h, ++ int h, + struct buffer_head **pfather, + struct buffer_head **pcom_father, char c_lr_par) + { +@@ -963,38 +963,38 @@ static int get_far_parent(struct tree_ba + INITIALIZE_PATH(s_path_to_neighbor_father); + struct treepath *path = tb->tb_path; + struct cpu_key s_lr_father_key; +- int n_counter, +- n_position = INT_MAX, +- n_first_last_position = 0, +- n_path_offset = PATH_H_PATH_OFFSET(path, n_h); ++ int counter, ++ position = INT_MAX, ++ first_last_position = 0, ++ path_offset = PATH_H_PATH_OFFSET(path, h); + +- /* Starting from F[n_h] go upwards in the tree, and look for the common +- ancestor of F[n_h], and its neighbor l/r, that should be obtained. */ ++ /* Starting from F[h] go upwards in the tree, and look for the common ++ ancestor of F[h], and its neighbor l/r, that should be obtained. */ + +- n_counter = n_path_offset; ++ counter = path_offset; + +- RFALSE(n_counter < FIRST_PATH_ELEMENT_OFFSET, ++ RFALSE(counter < FIRST_PATH_ELEMENT_OFFSET, + "PAP-8180: invalid path length"); + +- for (; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter--) { ++ for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) { + /* Check whether parent of the current buffer in the path is really parent in the tree. */ + if (!B_IS_IN_TREE +- (parent = PATH_OFFSET_PBUFFER(path, n_counter - 1))) ++ (parent = PATH_OFFSET_PBUFFER(path, counter - 1))) + return REPEAT_SEARCH; + /* Check whether position in the parent is correct. */ +- if ((n_position = ++ if ((position = + PATH_OFFSET_POSITION(path, +- n_counter - 1)) > ++ counter - 1)) > + B_NR_ITEMS(parent)) + return REPEAT_SEARCH; + /* Check whether parent at the path really points to the child. */ +- if (B_N_CHILD_NUM(parent, n_position) != +- PATH_OFFSET_PBUFFER(path, n_counter)->b_blocknr) ++ if (B_N_CHILD_NUM(parent, position) != ++ PATH_OFFSET_PBUFFER(path, counter)->b_blocknr) + return REPEAT_SEARCH; + /* Return delimiting key if position in the parent is not equal to first/last one. */ + if (c_lr_par == RIGHT_PARENTS) +- n_first_last_position = B_NR_ITEMS(parent); +- if (n_position != n_first_last_position) { ++ first_last_position = B_NR_ITEMS(parent); ++ if (position != first_last_position) { + *pcom_father = parent; + get_bh(*pcom_father); + /*(*pcom_father = parent)->b_count++; */ +@@ -1003,7 +1003,7 @@ static int get_far_parent(struct tree_ba + } + + /* if we are in the root of the tree, then there is no common father */ +- if (n_counter == FIRST_PATH_ELEMENT_OFFSET) { ++ if (counter == FIRST_PATH_ELEMENT_OFFSET) { + /* Check whether first buffer in the path is the root of the tree. */ + if (PATH_OFFSET_PBUFFER + (tb->tb_path, +@@ -1036,18 +1036,18 @@ static int get_far_parent(struct tree_ba + le_key2cpu_key(&s_lr_father_key, + B_N_PDELIM_KEY(*pcom_father, + (c_lr_par == +- LEFT_PARENTS) ? (tb->lkey[n_h - 1] = +- n_position - +- 1) : (tb->rkey[n_h - ++ LEFT_PARENTS) ? (tb->lkey[h - 1] = ++ position - ++ 1) : (tb->rkey[h - + 1] = +- n_position))); ++ position))); + + if (c_lr_par == LEFT_PARENTS) + decrement_key(&s_lr_father_key); + + if (search_by_key + (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, +- n_h + 1) == IO_ERROR) ++ h + 1) == IO_ERROR) + // path is released + return IO_ERROR; + +@@ -1059,7 +1059,7 @@ static int get_far_parent(struct tree_ba + + *pfather = PATH_PLAST_BUFFER(&s_path_to_neighbor_father); + +- RFALSE(B_LEVEL(*pfather) != n_h + 1, ++ RFALSE(B_LEVEL(*pfather) != h + 1, + "PAP-8190: (%b %z) level too small", *pfather, *pfather); + RFALSE(s_path_to_neighbor_father.path_length < + FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small"); +@@ -1069,92 +1069,92 @@ static int get_far_parent(struct tree_ba + return CARRY_ON; + } + +-/* Get parents of neighbors of node in the path(S[n_path_offset]) and common parents of +- * S[n_path_offset] and L[n_path_offset]/R[n_path_offset]: F[n_path_offset], FL[n_path_offset], +- * FR[n_path_offset], CFL[n_path_offset], CFR[n_path_offset]. +- * Calculate numbers of left and right delimiting keys position: lkey[n_path_offset], rkey[n_path_offset]. ++/* Get parents of neighbors of node in the path(S[path_offset]) and common parents of ++ * S[path_offset] and L[path_offset]/R[path_offset]: F[path_offset], FL[path_offset], ++ * FR[path_offset], CFL[path_offset], CFR[path_offset]. ++ * Calculate numbers of left and right delimiting keys position: lkey[path_offset], rkey[path_offset]. + * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; + * CARRY_ON - schedule didn't occur while the function worked; + */ +-static int get_parents(struct tree_balance *tb, int n_h) ++static int get_parents(struct tree_balance *tb, int h) + { + struct treepath *path = tb->tb_path; +- int n_position, +- n_ret_value, +- n_path_offset = PATH_H_PATH_OFFSET(tb->tb_path, n_h); ++ int position, ++ ret, ++ path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h); + struct buffer_head *curf, *curcf; + + /* Current node is the root of the tree or will be root of the tree */ +- if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) { ++ if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { + /* The root can not have parents. + Release nodes which previously were obtained as parents of the current node neighbors. */ +- brelse(tb->FL[n_h]); +- brelse(tb->CFL[n_h]); +- brelse(tb->FR[n_h]); +- brelse(tb->CFR[n_h]); +- tb->FL[n_h] = NULL; +- tb->CFL[n_h] = NULL; +- tb->FR[n_h] = NULL; +- tb->CFR[n_h] = NULL; ++ brelse(tb->FL[h]); ++ brelse(tb->CFL[h]); ++ brelse(tb->FR[h]); ++ brelse(tb->CFR[h]); ++ tb->FL[h] = NULL; ++ tb->CFL[h] = NULL; ++ tb->FR[h] = NULL; ++ tb->CFR[h] = NULL; + return CARRY_ON; + } + +- /* Get parent FL[n_path_offset] of L[n_path_offset]. */ +- n_position = PATH_OFFSET_POSITION(path, n_path_offset - 1); +- if (n_position) { ++ /* Get parent FL[path_offset] of L[path_offset]. */ ++ position = PATH_OFFSET_POSITION(path, path_offset - 1); ++ if (position) { + /* Current node is not the first child of its parent. */ +- curf = PATH_OFFSET_PBUFFER(path, n_path_offset - 1); +- curcf = PATH_OFFSET_PBUFFER(path, n_path_offset - 1); ++ curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); ++ curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); + get_bh(curf); + get_bh(curf); +- tb->lkey[n_h] = n_position - 1; ++ tb->lkey[h] = position - 1; + } else { +- /* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node. +- Calculate current common parent of L[n_path_offset] and the current node. Note that +- CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset]. +- Calculate lkey[n_path_offset]. */ +- if ((n_ret_value = get_far_parent(tb, n_h + 1, &curf, ++ /* Calculate current parent of L[path_offset], which is the left neighbor of the current node. ++ Calculate current common parent of L[path_offset] and the current node. Note that ++ CFL[path_offset] not equal FL[path_offset] and CFL[path_offset] not equal F[path_offset]. ++ Calculate lkey[path_offset]. */ ++ if ((ret = get_far_parent(tb, h + 1, &curf, + &curcf, + LEFT_PARENTS)) != CARRY_ON) +- return n_ret_value; ++ return ret; + } + +- brelse(tb->FL[n_h]); +- tb->FL[n_h] = curf; /* New initialization of FL[n_h]. */ +- brelse(tb->CFL[n_h]); +- tb->CFL[n_h] = curcf; /* New initialization of CFL[n_h]. */ ++ brelse(tb->FL[h]); ++ tb->FL[h] = curf; /* New initialization of FL[h]. */ ++ brelse(tb->CFL[h]); ++ tb->CFL[h] = curcf; /* New initialization of CFL[h]. */ + + RFALSE((curf && !B_IS_IN_TREE(curf)) || + (curcf && !B_IS_IN_TREE(curcf)), + "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf); + +-/* Get parent FR[n_h] of R[n_h]. */ ++/* Get parent FR[h] of R[h]. */ + +-/* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */ +- if (n_position == B_NR_ITEMS(PATH_H_PBUFFER(path, n_h + 1))) { +-/* Calculate current parent of R[n_h], which is the right neighbor of F[n_h]. +- Calculate current common parent of R[n_h] and current node. Note that CFR[n_h] +- not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */ +- if ((n_ret_value = +- get_far_parent(tb, n_h + 1, &curf, &curcf, ++/* Current node is the last child of F[h]. FR[h] != F[h]. */ ++ if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) { ++/* Calculate current parent of R[h], which is the right neighbor of F[h]. ++ Calculate current common parent of R[h] and current node. Note that CFR[h] ++ not equal FR[path_offset] and CFR[h] not equal F[h]. */ ++ if ((ret = ++ get_far_parent(tb, h + 1, &curf, &curcf, + RIGHT_PARENTS)) != CARRY_ON) +- return n_ret_value; ++ return ret; + } else { +-/* Current node is not the last child of its parent F[n_h]. */ +- curf = PATH_OFFSET_PBUFFER(path, n_path_offset - 1); +- curcf = PATH_OFFSET_PBUFFER(path, n_path_offset - 1); ++/* Current node is not the last child of its parent F[h]. */ ++ curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); ++ curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); + get_bh(curf); + get_bh(curf); +- tb->rkey[n_h] = n_position; ++ tb->rkey[h] = position; + } + +- brelse(tb->FR[n_h]); +- /* New initialization of FR[n_path_offset]. */ +- tb->FR[n_h] = curf; ++ brelse(tb->FR[h]); ++ /* New initialization of FR[path_offset]. */ ++ tb->FR[h] = curf; + +- brelse(tb->CFR[n_h]); +- /* New initialization of CFR[n_path_offset]. */ +- tb->CFR[n_h] = curcf; ++ brelse(tb->CFR[h]); ++ /* New initialization of CFR[path_offset]. */ ++ tb->CFR[h] = curcf; + + RFALSE((curf && !B_IS_IN_TREE(curf)) || + (curcf && !B_IS_IN_TREE(curcf)), +@@ -1222,7 +1222,7 @@ static int ip_check_balance(struct tree_ + contains node being balanced. The mnemonic is + that the attempted change in node space used level + is levbytes bytes. */ +- n_ret_value; ++ ret; + + int lfree, sfree, rfree /* free space in L, S and R */ ; + +@@ -1262,22 +1262,22 @@ static int ip_check_balance(struct tree_ + if (!h) + reiserfs_panic(tb->tb_sb, "vs-8210", + "S[0] can not be 0"); +- switch (n_ret_value = get_empty_nodes(tb, h)) { ++ switch (ret = get_empty_nodes(tb, h)) { + case CARRY_ON: + set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); + return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ + + case NO_DISK_SPACE: + case REPEAT_SEARCH: +- return n_ret_value; ++ return ret; + default: + reiserfs_panic(tb->tb_sb, "vs-8215", "incorrect " + "return value of get_empty_nodes"); + } + } + +- if ((n_ret_value = get_parents(tb, h)) != CARRY_ON) /* get parents of S[h] neighbors. */ +- return n_ret_value; ++ if ((ret = get_parents(tb, h)) != CARRY_ON) /* get parents of S[h] neighbors. */ ++ return ret; + + sfree = B_FREE_SPACE(Sh); + +@@ -1564,7 +1564,7 @@ static int dc_check_balance_internal(str + /* Sh is the node whose balance is currently being checked, + and Fh is its father. */ + struct buffer_head *Sh, *Fh; +- int maxsize, n_ret_value; ++ int maxsize, ret; + int lfree, rfree /* free space in L and R */ ; + + Sh = PATH_H_PBUFFER(tb->tb_path, h); +@@ -1589,8 +1589,8 @@ static int dc_check_balance_internal(str + return CARRY_ON; + } + +- if ((n_ret_value = get_parents(tb, h)) != CARRY_ON) +- return n_ret_value; ++ if ((ret = get_parents(tb, h)) != CARRY_ON) ++ return ret; + + /* get free space of neighbors */ + rfree = get_rfree(tb, h); +@@ -1747,7 +1747,7 @@ static int dc_check_balance_leaf(struct + attempted change in node space used level is levbytes bytes. */ + int levbytes; + /* the maximal item size */ +- int maxsize, n_ret_value; ++ int maxsize, ret; + /* S0 is the node whose balance is currently being checked, + and F0 is its father. */ + struct buffer_head *S0, *F0; +@@ -1769,8 +1769,8 @@ static int dc_check_balance_leaf(struct + return NO_BALANCING_NEEDED; + } + +- if ((n_ret_value = get_parents(tb, h)) != CARRY_ON) +- return n_ret_value; ++ if ((ret = get_parents(tb, h)) != CARRY_ON) ++ return ret; + + /* get free space of neighbors */ + rfree = get_rfree(tb, h); +@@ -1889,40 +1889,40 @@ static int check_balance(int mode, + } + + /* Check whether parent at the path is the really parent of the current node.*/ +-static int get_direct_parent(struct tree_balance *tb, int n_h) ++static int get_direct_parent(struct tree_balance *tb, int h) + { + struct buffer_head *bh; + struct treepath *path = tb->tb_path; +- int n_position, +- n_path_offset = PATH_H_PATH_OFFSET(tb->tb_path, n_h); ++ int position, ++ path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h); + + /* We are in the root or in the new root. */ +- if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) { ++ if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { + +- RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1, ++ RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET - 1, + "PAP-8260: invalid offset in the path"); + + if (PATH_OFFSET_PBUFFER(path, FIRST_PATH_ELEMENT_OFFSET)-> + b_blocknr == SB_ROOT_BLOCK(tb->tb_sb)) { + /* Root is not changed. */ +- PATH_OFFSET_PBUFFER(path, n_path_offset - 1) = NULL; +- PATH_OFFSET_POSITION(path, n_path_offset - 1) = 0; ++ PATH_OFFSET_PBUFFER(path, path_offset - 1) = NULL; ++ PATH_OFFSET_POSITION(path, path_offset - 1) = 0; + return CARRY_ON; + } + return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */ + } + + if (!B_IS_IN_TREE +- (bh = PATH_OFFSET_PBUFFER(path, n_path_offset - 1))) ++ (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1))) + return REPEAT_SEARCH; /* Parent in the path is not in the tree. */ + +- if ((n_position = ++ if ((position = + PATH_OFFSET_POSITION(path, +- n_path_offset - 1)) > B_NR_ITEMS(bh)) ++ path_offset - 1)) > B_NR_ITEMS(bh)) + return REPEAT_SEARCH; + +- if (B_N_CHILD_NUM(bh, n_position) != +- PATH_OFFSET_PBUFFER(path, n_path_offset)->b_blocknr) ++ if (B_N_CHILD_NUM(bh, position) != ++ PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr) + /* Parent in the path is not parent of the current node in the tree. */ + return REPEAT_SEARCH; + +@@ -1935,92 +1935,92 @@ static int get_direct_parent(struct tree + return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */ + } + +-/* Using lnum[n_h] and rnum[n_h] we should determine what neighbors +- * of S[n_h] we +- * need in order to balance S[n_h], and get them if necessary. ++/* Using lnum[h] and rnum[h] we should determine what neighbors ++ * of S[h] we ++ * need in order to balance S[h], and get them if necessary. + * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; + * CARRY_ON - schedule didn't occur while the function worked; + */ +-static int get_neighbors(struct tree_balance *tb, int n_h) ++static int get_neighbors(struct tree_balance *tb, int h) + { +- int n_child_position, +- n_path_offset = PATH_H_PATH_OFFSET(tb->tb_path, n_h + 1); +- unsigned long n_son_number; ++ int child_position, ++ path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h + 1); ++ unsigned long son_number; + struct super_block *sb = tb->tb_sb; + struct buffer_head *bh; + +- PROC_INFO_INC(sb, get_neighbors[n_h]); ++ PROC_INFO_INC(sb, get_neighbors[h]); + +- if (tb->lnum[n_h]) { +- /* We need left neighbor to balance S[n_h]. */ +- PROC_INFO_INC(sb, need_l_neighbor[n_h]); +- bh = PATH_OFFSET_PBUFFER(tb->tb_path, n_path_offset); ++ if (tb->lnum[h]) { ++ /* We need left neighbor to balance S[h]. */ ++ PROC_INFO_INC(sb, need_l_neighbor[h]); ++ bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); + +- RFALSE(bh == tb->FL[n_h] && +- !PATH_OFFSET_POSITION(tb->tb_path, n_path_offset), ++ RFALSE(bh == tb->FL[h] && ++ !PATH_OFFSET_POSITION(tb->tb_path, path_offset), + "PAP-8270: invalid position in the parent"); + +- n_child_position = ++ child_position = + (bh == +- tb->FL[n_h]) ? tb->lkey[n_h] : B_NR_ITEMS(tb-> +- FL[n_h]); +- n_son_number = B_N_CHILD_NUM(tb->FL[n_h], n_child_position); +- bh = sb_bread(sb, n_son_number); ++ tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb-> ++ FL[h]); ++ son_number = B_N_CHILD_NUM(tb->FL[h], child_position); ++ bh = sb_bread(sb, son_number); + if (!bh) + return IO_ERROR; + if (FILESYSTEM_CHANGED_TB(tb)) { + brelse(bh); +- PROC_INFO_INC(sb, get_neighbors_restart[n_h]); ++ PROC_INFO_INC(sb, get_neighbors_restart[h]); + return REPEAT_SEARCH; + } + +- RFALSE(!B_IS_IN_TREE(tb->FL[n_h]) || +- n_child_position > B_NR_ITEMS(tb->FL[n_h]) || +- B_N_CHILD_NUM(tb->FL[n_h], n_child_position) != ++ RFALSE(!B_IS_IN_TREE(tb->FL[h]) || ++ child_position > B_NR_ITEMS(tb->FL[h]) || ++ B_N_CHILD_NUM(tb->FL[h], child_position) != + bh->b_blocknr, "PAP-8275: invalid parent"); + RFALSE(!B_IS_IN_TREE(bh), "PAP-8280: invalid child"); +- RFALSE(!n_h && ++ RFALSE(!h && + B_FREE_SPACE(bh) != + MAX_CHILD_SIZE(bh) - +- dc_size(B_N_CHILD(tb->FL[0], n_child_position)), ++ dc_size(B_N_CHILD(tb->FL[0], child_position)), + "PAP-8290: invalid child size of left neighbor"); + +- brelse(tb->L[n_h]); +- tb->L[n_h] = bh; ++ brelse(tb->L[h]); ++ tb->L[h] = bh; + } + +- /* We need right neighbor to balance S[n_path_offset]. */ +- if (tb->rnum[n_h]) { +- PROC_INFO_INC(sb, need_r_neighbor[n_h]); +- bh = PATH_OFFSET_PBUFFER(tb->tb_path, n_path_offset); ++ /* We need right neighbor to balance S[path_offset]. */ ++ if (tb->rnum[h]) { /* We need right neighbor to balance S[path_offset]. */ ++ PROC_INFO_INC(sb, need_r_neighbor[h]); ++ bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); + +- RFALSE(bh == tb->FR[n_h] && ++ RFALSE(bh == tb->FR[h] && + PATH_OFFSET_POSITION(tb->tb_path, +- n_path_offset) >= ++ path_offset) >= + B_NR_ITEMS(bh), + "PAP-8295: invalid position in the parent"); + +- n_child_position = +- (bh == tb->FR[n_h]) ? tb->rkey[n_h] + 1 : 0; +- n_son_number = B_N_CHILD_NUM(tb->FR[n_h], n_child_position); +- bh = sb_bread(sb, n_son_number); ++ child_position = ++ (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0; ++ son_number = B_N_CHILD_NUM(tb->FR[h], child_position); ++ bh = sb_bread(sb, son_number); + if (!bh) + return IO_ERROR; + if (FILESYSTEM_CHANGED_TB(tb)) { + brelse(bh); +- PROC_INFO_INC(sb, get_neighbors_restart[n_h]); ++ PROC_INFO_INC(sb, get_neighbors_restart[h]); + return REPEAT_SEARCH; + } +- brelse(tb->R[n_h]); +- tb->R[n_h] = bh; ++ brelse(tb->R[h]); ++ tb->R[h] = bh; + +- RFALSE(!n_h ++ RFALSE(!h + && B_FREE_SPACE(bh) != + MAX_CHILD_SIZE(bh) - +- dc_size(B_N_CHILD(tb->FR[0], n_child_position)), ++ dc_size(B_N_CHILD(tb->FR[0], child_position)), + "PAP-8300: invalid child size of right neighbor (%d != %d - %d)", + B_FREE_SPACE(bh), MAX_CHILD_SIZE(bh), +- dc_size(B_N_CHILD(tb->FR[0], n_child_position))); ++ dc_size(B_N_CHILD(tb->FR[0], child_position))); + + } + return CARRY_ON; +@@ -2317,11 +2317,11 @@ static int wait_tb_buffers_until_unlocke + * -1 - if no_disk_space + */ + +-int fix_nodes(int n_op_mode, struct tree_balance *tb, ++int fix_nodes(int op_mode, struct tree_balance *tb, + struct item_head *ins_ih, const void *data) + { +- int n_ret_value, n_h, n_item_num = PATH_LAST_POSITION(tb->tb_path); +- int n_pos_in_item; ++ int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path); ++ int pos_in_item; + + /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared + ** during wait_tb_buffers_run +@@ -2331,7 +2331,7 @@ int fix_nodes(int n_op_mode, struct tree + + ++REISERFS_SB(tb->tb_sb)->s_fix_nodes; + +- n_pos_in_item = tb->tb_path->pos_in_item; ++ pos_in_item = tb->tb_path->pos_in_item; + + tb->fs_gen = get_generation(tb->tb_sb); + +@@ -2364,26 +2364,26 @@ int fix_nodes(int n_op_mode, struct tree + reiserfs_panic(tb->tb_sb, "PAP-8320", "S[0] (%b %z) is " + "not uptodate at the beginning of fix_nodes " + "or not in tree (mode %c)", +- tbS0, tbS0, n_op_mode); ++ tbS0, tbS0, op_mode); + + /* Check parameters. */ +- switch (n_op_mode) { ++ switch (op_mode) { + case M_INSERT: +- if (n_item_num <= 0 || n_item_num > B_NR_ITEMS(tbS0)) ++ if (item_num <= 0 || item_num > B_NR_ITEMS(tbS0)) + reiserfs_panic(tb->tb_sb, "PAP-8330", "Incorrect " + "item number %d (in S0 - %d) in case " +- "of insert", n_item_num, ++ "of insert", item_num, + B_NR_ITEMS(tbS0)); + break; + case M_PASTE: + case M_DELETE: + case M_CUT: +- if (n_item_num < 0 || n_item_num >= B_NR_ITEMS(tbS0)) { ++ if (item_num < 0 || item_num >= B_NR_ITEMS(tbS0)) { + print_block(tbS0, 0, -1, -1); + reiserfs_panic(tb->tb_sb, "PAP-8335", "Incorrect " + "item number(%d); mode = %c " + "insert_size = %d", +- n_item_num, n_op_mode, ++ item_num, op_mode, + tb->insert_size[0]); + } + break; +@@ -2397,73 +2397,73 @@ int fix_nodes(int n_op_mode, struct tree + // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat + return REPEAT_SEARCH; + +- /* Starting from the leaf level; for all levels n_h of the tree. */ +- for (n_h = 0; n_h < MAX_HEIGHT && tb->insert_size[n_h]; n_h++) { +- n_ret_value = get_direct_parent(tb, n_h); +- if (n_ret_value != CARRY_ON) ++ /* Starting from the leaf level; for all levels h of the tree. */ ++ for (h = 0; h < MAX_HEIGHT && tb->insert_size[h]; h++) { ++ ret = get_direct_parent(tb, h); ++ if (ret != CARRY_ON) + goto repeat; + +- n_ret_value = check_balance(n_op_mode, tb, n_h, n_item_num, +- n_pos_in_item, ins_ih, data); +- if (n_ret_value != CARRY_ON) { +- if (n_ret_value == NO_BALANCING_NEEDED) { ++ ret = check_balance(op_mode, tb, h, item_num, ++ pos_in_item, ins_ih, data); ++ if (ret != CARRY_ON) { ++ if (ret == NO_BALANCING_NEEDED) { + /* No balancing for higher levels needed. */ +- n_ret_value = get_neighbors(tb, n_h); +- if (n_ret_value != CARRY_ON) ++ ret = get_neighbors(tb, h); ++ if (ret != CARRY_ON) + goto repeat; +- if (n_h != MAX_HEIGHT - 1) +- tb->insert_size[n_h + 1] = 0; ++ if (h != MAX_HEIGHT - 1) ++ tb->insert_size[h + 1] = 0; + /* ok, analysis and resource gathering are complete */ + break; + } + goto repeat; + } + +- n_ret_value = get_neighbors(tb, n_h); +- if (n_ret_value != CARRY_ON) ++ ret = get_neighbors(tb, h); ++ if (ret != CARRY_ON) + goto repeat; + + /* No disk space, or schedule occurred and analysis may be + * invalid and needs to be redone. */ +- n_ret_value = get_empty_nodes(tb, n_h); +- if (n_ret_value != CARRY_ON) ++ ret = get_empty_nodes(tb, h); ++ if (ret != CARRY_ON) + goto repeat; + +- if (!PATH_H_PBUFFER(tb->tb_path, n_h)) { ++ if (!PATH_H_PBUFFER(tb->tb_path, h)) { + /* We have a positive insert size but no nodes exist on this + level, this means that we are creating a new root. */ + +- RFALSE(tb->blknum[n_h] != 1, ++ RFALSE(tb->blknum[h] != 1, + "PAP-8350: creating new empty root"); + +- if (n_h < MAX_HEIGHT - 1) +- tb->insert_size[n_h + 1] = 0; +- } else if (!PATH_H_PBUFFER(tb->tb_path, n_h + 1)) { +- if (tb->blknum[n_h] > 1) { +- /* The tree needs to be grown, so this node S[n_h] ++ if (h < MAX_HEIGHT - 1) ++ tb->insert_size[h + 1] = 0; ++ } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) { ++ if (tb->blknum[h] > 1) { ++ /* The tree needs to be grown, so this node S[h] + which is the root node is split into two nodes, +- and a new node (S[n_h+1]) will be created to ++ and a new node (S[h+1]) will be created to + become the root node. */ + +- RFALSE(n_h == MAX_HEIGHT - 1, ++ RFALSE(h == MAX_HEIGHT - 1, + "PAP-8355: attempt to create too high of a tree"); + +- tb->insert_size[n_h + 1] = ++ tb->insert_size[h + 1] = + (DC_SIZE + +- KEY_SIZE) * (tb->blknum[n_h] - 1) + ++ KEY_SIZE) * (tb->blknum[h] - 1) + + DC_SIZE; +- } else if (n_h < MAX_HEIGHT - 1) +- tb->insert_size[n_h + 1] = 0; ++ } else if (h < MAX_HEIGHT - 1) ++ tb->insert_size[h + 1] = 0; + } else +- tb->insert_size[n_h + 1] = +- (DC_SIZE + KEY_SIZE) * (tb->blknum[n_h] - 1); ++ tb->insert_size[h + 1] = ++ (DC_SIZE + KEY_SIZE) * (tb->blknum[h] - 1); + } + +- n_ret_value = wait_tb_buffers_until_unlocked(tb); +- if (n_ret_value == CARRY_ON) { ++ ret = wait_tb_buffers_until_unlocked(tb); ++ if (ret == CARRY_ON) { + if (FILESYSTEM_CHANGED_TB(tb)) { + wait_tb_buffers_run = 1; +- n_ret_value = REPEAT_SEARCH; ++ ret = REPEAT_SEARCH; + goto repeat; + } else { + return CARRY_ON; +@@ -2529,7 +2529,7 @@ int fix_nodes(int n_op_mode, struct tree + (tb->tb_sb, tb->FEB[i]); + } + } +- return n_ret_value; ++ return ret; + } + + } +--- a/fs/reiserfs/stree.c ++++ b/fs/reiserfs/stree.c +@@ -136,11 +136,11 @@ inline int comp_short_le_keys(const stru + const struct reiserfs_key *key2) + { + __u32 *k1_u32, *k2_u32; +- int n_key_length = REISERFS_SHORT_KEY_LEN; ++ int key_length = REISERFS_SHORT_KEY_LEN; + + k1_u32 = (__u32 *) key1; + k2_u32 = (__u32 *) key2; +- for (; n_key_length--; ++k1_u32, ++k2_u32) { ++ for (; key_length--; ++k1_u32, ++k2_u32) { + if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32)) + return -1; + if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32)) +@@ -177,10 +177,10 @@ inline int comp_le_keys(const struct rei + * *pos = number of the searched element if found, else the * + * number of the first element that is larger than key. * + **************************************************************************/ +-/* For those not familiar with binary search: n_lbound is the leftmost item that it +- could be, n_rbound the rightmost item that it could be. We examine the item +- halfway between n_lbound and n_rbound, and that tells us either that we can increase +- n_lbound, or decrease n_rbound, or that we have found it, or if n_lbound <= n_rbound that ++/* For those not familiar with binary search: lbound is the leftmost item that it ++ could be, rbound the rightmost item that it could be. We examine the item ++ halfway between lbound and rbound, and that tells us either that we can increase ++ lbound, or decrease rbound, or that we have found it, or if lbound <= rbound that + there are no possible items, and we have not found it. With each examination we + cut the number of possible items it could be by one more than half rounded down, + or we find it. */ +@@ -198,28 +198,27 @@ static inline int bin_search(const void + int *pos /* Number of the searched for element. */ + ) + { +- int n_rbound, n_lbound, n_j; ++ int rbound, lbound, j; + +- for (n_j = ((n_rbound = num - 1) + (n_lbound = 0)) / 2; +- n_lbound <= n_rbound; n_j = (n_rbound + n_lbound) / 2) ++ for (j = ((rbound = num - 1) + (lbound = 0)) / 2; ++ lbound <= rbound; j = (rbound + lbound) / 2) + switch (comp_keys +- ((struct reiserfs_key *)((char *)base + +- n_j * width), ++ ((struct reiserfs_key *)((char *)base + j * width), + (struct cpu_key *)key)) { + case -1: +- n_lbound = n_j + 1; ++ lbound = j + 1; + continue; + case 1: +- n_rbound = n_j - 1; ++ rbound = j - 1; + continue; + case 0: +- *pos = n_j; ++ *pos = j; + return ITEM_FOUND; /* Key found in the array. */ + } + + /* bin_search did not find given key, it returns position of key, + that is minimal and greater than the given one. */ +- *pos = n_lbound; ++ *pos = lbound; + return ITEM_NOT_FOUND; + } + +@@ -242,43 +241,41 @@ static const struct reiserfs_key MAX_KEY + of the path, and going upwards. We must check the path's validity at each step. If the key is not in + the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this + case we return a special key, either MIN_KEY or MAX_KEY. */ +-static inline const struct reiserfs_key *get_lkey(const struct treepath +- *chk_path, +- const struct super_block +- *sb) ++static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path, ++ const struct super_block *sb) + { +- int n_position, n_path_offset = chk_path->path_length; ++ int position, path_offset = chk_path->path_length; + struct buffer_head *parent; + +- RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET, ++ RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET, + "PAP-5010: invalid offset in the path"); + + /* While not higher in path than first element. */ +- while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { ++ while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { + + RFALSE(!buffer_uptodate +- (PATH_OFFSET_PBUFFER(chk_path, n_path_offset)), ++ (PATH_OFFSET_PBUFFER(chk_path, path_offset)), + "PAP-5020: parent is not uptodate"); + + /* Parent at the path is not in the tree now. */ + if (!B_IS_IN_TREE + (parent = +- PATH_OFFSET_PBUFFER(chk_path, n_path_offset))) ++ PATH_OFFSET_PBUFFER(chk_path, path_offset))) + return &MAX_KEY; + /* Check whether position in the parent is correct. */ +- if ((n_position = ++ if ((position = + PATH_OFFSET_POSITION(chk_path, +- n_path_offset)) > ++ path_offset)) > + B_NR_ITEMS(parent)) + return &MAX_KEY; + /* Check whether parent at the path really points to the child. */ +- if (B_N_CHILD_NUM(parent, n_position) != ++ if (B_N_CHILD_NUM(parent, position) != + PATH_OFFSET_PBUFFER(chk_path, +- n_path_offset + 1)->b_blocknr) ++ path_offset + 1)->b_blocknr) + return &MAX_KEY; + /* Return delimiting key if position in the parent is not equal to zero. */ +- if (n_position) +- return B_N_PDELIM_KEY(parent, n_position - 1); ++ if (position) ++ return B_N_PDELIM_KEY(parent, position - 1); + } + /* Return MIN_KEY if we are in the root of the buffer tree. */ + if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> +@@ -291,37 +288,37 @@ static inline const struct reiserfs_key + inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path, + const struct super_block *sb) + { +- int n_position, n_path_offset = chk_path->path_length; ++ int position, path_offset = chk_path->path_length; + struct buffer_head *parent; + +- RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET, ++ RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET, + "PAP-5030: invalid offset in the path"); + +- while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { ++ while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { + + RFALSE(!buffer_uptodate +- (PATH_OFFSET_PBUFFER(chk_path, n_path_offset)), ++ (PATH_OFFSET_PBUFFER(chk_path, path_offset)), + "PAP-5040: parent is not uptodate"); + + /* Parent at the path is not in the tree now. */ + if (!B_IS_IN_TREE + (parent = +- PATH_OFFSET_PBUFFER(chk_path, n_path_offset))) ++ PATH_OFFSET_PBUFFER(chk_path, path_offset))) + return &MIN_KEY; + /* Check whether position in the parent is correct. */ +- if ((n_position = ++ if ((position = + PATH_OFFSET_POSITION(chk_path, +- n_path_offset)) > ++ path_offset)) > + B_NR_ITEMS(parent)) + return &MIN_KEY; + /* Check whether parent at the path really points to the child. */ +- if (B_N_CHILD_NUM(parent, n_position) != ++ if (B_N_CHILD_NUM(parent, position) != + PATH_OFFSET_PBUFFER(chk_path, +- n_path_offset + 1)->b_blocknr) ++ path_offset + 1)->b_blocknr) + return &MIN_KEY; + /* Return delimiting key if position in the parent is not the last one. */ +- if (n_position != B_NR_ITEMS(parent)) +- return B_N_PDELIM_KEY(parent, n_position); ++ if (position != B_NR_ITEMS(parent)) ++ return B_N_PDELIM_KEY(parent, position); + } + /* Return MAX_KEY if we are in the root of the buffer tree. */ + if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> +@@ -371,14 +368,14 @@ int reiserfs_check_path(struct treepath + void pathrelse_and_restore(struct super_block *sb, + struct treepath *search_path) + { +- int n_path_offset = search_path->path_length; ++ int path_offset = search_path->path_length; + +- RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, ++ RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, + "clm-4000: invalid path offset"); + +- while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) { ++ while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) { + struct buffer_head *bh; +- bh = PATH_OFFSET_PBUFFER(search_path, n_path_offset--); ++ bh = PATH_OFFSET_PBUFFER(search_path, path_offset--); + reiserfs_restore_prepared_buffer(sb, bh); + brelse(bh); + } +@@ -388,13 +385,13 @@ void pathrelse_and_restore(struct super_ + /* Drop the reference to each buffer in a path */ + void pathrelse(struct treepath *search_path) + { +- int n_path_offset = search_path->path_length; ++ int path_offset = search_path->path_length; + +- RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, ++ RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, + "PAP-5090: invalid path offset"); + +- while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) +- brelse(PATH_OFFSET_PBUFFER(search_path, n_path_offset--)); ++ while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) ++ brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--)); + + search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; + } +@@ -572,16 +569,16 @@ int search_by_key(struct super_block *sb + by the calling + function. It is filled up + by this function. */ +- int n_stop_level /* How far down the tree to search. To ++ int stop_level /* How far down the tree to search. To + stop at leaf level - set to + DISK_LEAF_NODE_LEVEL */ + ) + { +- b_blocknr_t n_block_number; ++ b_blocknr_t block_number; + int expected_level; + struct buffer_head *bh; + struct path_element *last_element; +- int n_node_level, n_retval; ++ int node_level, retval; + int right_neighbor_of_leaf_node; + int fs_gen; + struct buffer_head *reada_bh[SEARCH_BY_KEY_READA]; +@@ -589,7 +586,7 @@ int search_by_key(struct super_block *sb + int reada_count = 0; + + #ifdef CONFIG_REISERFS_CHECK +- int n_repeat_counter = 0; ++ int repeat_counter = 0; + #endif + + PROC_INFO_INC(sb, search_by_key); +@@ -605,16 +602,16 @@ int search_by_key(struct super_block *sb + /* With each iteration of this loop we search through the items in the + current node, and calculate the next current node(next path element) + for the next iteration of this loop.. */ +- n_block_number = SB_ROOT_BLOCK(sb); ++ block_number = SB_ROOT_BLOCK(sb); + expected_level = -1; + while (1) { + + #ifdef CONFIG_REISERFS_CHECK +- if (!(++n_repeat_counter % 50000)) ++ if (!(++repeat_counter % 50000)) + reiserfs_warning(sb, "PAP-5100", + "%s: there were %d iterations of " + "while loop looking for key %K", +- current->comm, n_repeat_counter, ++ current->comm, repeat_counter, + key); + #endif + +@@ -627,7 +624,7 @@ int search_by_key(struct super_block *sb + /* Read the next tree node, and set the last element in the path to + have a pointer to it. */ + if ((bh = last_element->pe_buffer = +- sb_getblk(sb, n_block_number))) { ++ sb_getblk(sb, block_number))) { + if (!buffer_uptodate(bh) && reada_count > 1) + search_by_key_reada(sb, reada_bh, + reada_blocks, reada_count); +@@ -661,7 +658,7 @@ int search_by_key(struct super_block *sb + + /* Get the root block number so that we can repeat the search + starting from the root. */ +- n_block_number = SB_ROOT_BLOCK(sb); ++ block_number = SB_ROOT_BLOCK(sb); + expected_level = -1; + right_neighbor_of_leaf_node = 0; + +@@ -694,26 +691,26 @@ int search_by_key(struct super_block *sb + } + + /* ok, we have acquired next formatted node in the tree */ +- n_node_level = B_LEVEL(bh); ++ node_level = B_LEVEL(bh); + +- PROC_INFO_BH_STAT(sb, bh, n_node_level - 1); ++ PROC_INFO_BH_STAT(sb, bh, node_level - 1); + +- RFALSE(n_node_level < n_stop_level, ++ RFALSE(node_level < stop_level, + "vs-5152: tree level (%d) is less than stop level (%d)", +- n_node_level, n_stop_level); ++ node_level, stop_level); + +- n_retval = bin_search(key, B_N_PITEM_HEAD(bh, 0), ++ retval = bin_search(key, B_N_PITEM_HEAD(bh, 0), + B_NR_ITEMS(bh), +- (n_node_level == ++ (node_level == + DISK_LEAF_NODE_LEVEL) ? IH_SIZE : + KEY_SIZE, + &(last_element->pe_position)); +- if (n_node_level == n_stop_level) { +- return n_retval; ++ if (node_level == stop_level) { ++ return retval; + } + + /* we are not in the stop level */ +- if (n_retval == ITEM_FOUND) ++ if (retval == ITEM_FOUND) + /* item has been found, so we choose the pointer which is to the right of the found one */ + last_element->pe_position++; + +@@ -724,12 +721,12 @@ int search_by_key(struct super_block *sb + /* So we have chosen a position in the current node which is + an internal node. Now we calculate child block number by + position in the node. */ +- n_block_number = ++ block_number = + B_N_CHILD_NUM(bh, last_element->pe_position); + + /* if we are going to read leaf nodes, try for read ahead as well */ + if ((search_path->reada & PATH_READA) && +- n_node_level == DISK_LEAF_NODE_LEVEL + 1) { ++ node_level == DISK_LEAF_NODE_LEVEL + 1) { + int pos = last_element->pe_position; + int limit = B_NR_ITEMS(bh); + struct reiserfs_key *le_key; +@@ -781,7 +778,7 @@ int search_for_position_by_key(struct su + ) + { + struct item_head *p_le_ih; /* pointer to on-disk structure */ +- int n_blk_size; ++ int blk_size; + loff_t item_offset, offset; + struct reiserfs_dir_entry de; + int retval; +@@ -816,7 +813,7 @@ int search_for_position_by_key(struct su + p_le_ih = + B_N_PITEM_HEAD(PATH_PLAST_BUFFER(search_path), + --PATH_LAST_POSITION(search_path)); +- n_blk_size = sb->s_blocksize; ++ blk_size = sb->s_blocksize; + + if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) { + return FILE_NOT_FOUND; +@@ -828,10 +825,10 @@ int search_for_position_by_key(struct su + + /* Needed byte is contained in the item pointed to by the path. */ + if (item_offset <= offset && +- item_offset + op_bytes_number(p_le_ih, n_blk_size) > offset) { ++ item_offset + op_bytes_number(p_le_ih, blk_size) > offset) { + pos_in_item(search_path) = offset - item_offset; + if (is_indirect_le_ih(p_le_ih)) { +- pos_in_item(search_path) /= n_blk_size; ++ pos_in_item(search_path) /= blk_size; + } + return POSITION_FOUND; + } +@@ -891,7 +888,7 @@ static inline int prepare_for_direct_ite + if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) { + // + round_len = ROUND_UP(new_file_length); +- /* this was n_new_file_length < le_ih ... */ ++ /* this was new_file_length < le_ih ... */ + if (round_len < le_ih_k_offset(le_ih)) { + *cut_size = -(IH_SIZE + ih_item_len(le_ih)); + return M_DELETE; /* Delete this item. */ +@@ -953,7 +950,7 @@ static inline int prepare_for_direntry_i + This function returns a determination of what balance mode the calling function should employ. */ + static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *path, const struct cpu_key *item_key, int *removed, /* Number of unformatted nodes which were removed + from end of the file. */ +- int *cut_size, unsigned long long n_new_file_length /* MAX_KEY_OFFSET in case of delete. */ ++ int *cut_size, unsigned long long new_file_length /* MAX_KEY_OFFSET in case of delete. */ + ) + { + struct super_block *sb = inode->i_sb; +@@ -965,7 +962,7 @@ static char prepare_for_delete_or_cut(st + /* Stat_data item. */ + if (is_statdata_le_ih(p_le_ih)) { + +- RFALSE(n_new_file_length != max_reiserfs_offset(inode), ++ RFALSE(new_file_length != max_reiserfs_offset(inode), + "PAP-5210: mode must be M_DELETE"); + + *cut_size = -(IH_SIZE + ih_item_len(p_le_ih)); +@@ -975,13 +972,13 @@ static char prepare_for_delete_or_cut(st + /* Directory item. */ + if (is_direntry_le_ih(p_le_ih)) + return prepare_for_direntry_item(path, p_le_ih, inode, +- n_new_file_length, ++ new_file_length, + cut_size); + + /* Direct item. */ + if (is_direct_le_ih(p_le_ih)) + return prepare_for_direct_item(path, p_le_ih, inode, +- n_new_file_length, cut_size); ++ new_file_length, cut_size); + + /* Case of an indirect item. */ + { +@@ -992,10 +989,10 @@ static char prepare_for_delete_or_cut(st + int result = M_CUT; + int pos = 0; + +- if ( n_new_file_length == max_reiserfs_offset (inode) ) { ++ if ( new_file_length == max_reiserfs_offset (inode) ) { + /* prepare_for_delete_or_cut() is called by + * reiserfs_delete_item() */ +- n_new_file_length = 0; ++ new_file_length = 0; + delete = 1; + } + +@@ -1006,7 +1003,7 @@ static char prepare_for_delete_or_cut(st + copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); + pos = I_UNFM_NUM(&s_ih); + +- while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > n_new_file_length) { ++ while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) { + __le32 *unfm; + __u32 block; + +@@ -1062,35 +1059,34 @@ static char prepare_for_delete_or_cut(st + } + + /* Calculate number of bytes which will be deleted or cut during balance */ +-static int calc_deleted_bytes_number(struct tree_balance *tb, char c_mode) ++static int calc_deleted_bytes_number(struct tree_balance *tb, char mode) + { +- int n_del_size; ++ int del_size; + struct item_head *p_le_ih = PATH_PITEM_HEAD(tb->tb_path); + + if (is_statdata_le_ih(p_le_ih)) + return 0; + +- n_del_size = +- (c_mode == ++ del_size = ++ (mode == + M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0]; + if (is_direntry_le_ih(p_le_ih)) { +- // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */ +- // we can't use EMPTY_DIR_SIZE, as old format dirs have a different +- // empty size. ick. FIXME, is this right? +- // +- return n_del_size; ++ /* return EMPTY_DIR_SIZE; We delete emty directoris only. ++ * we can't use EMPTY_DIR_SIZE, as old format dirs have a different ++ * empty size. ick. FIXME, is this right? */ ++ return del_size; + } + + if (is_indirect_le_ih(p_le_ih)) +- n_del_size = (n_del_size / UNFM_P_SIZE) * ++ del_size = (del_size / UNFM_P_SIZE) * + (PATH_PLAST_BUFFER(tb->tb_path)->b_size); +- return n_del_size; ++ return del_size; + } + + static void init_tb_struct(struct reiserfs_transaction_handle *th, + struct tree_balance *tb, + struct super_block *sb, +- struct treepath *path, int n_size) ++ struct treepath *path, int size) + { + + BUG_ON(!th->t_trans_id); +@@ -1101,7 +1097,7 @@ static void init_tb_struct(struct reiser + tb->tb_path = path; + PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL; + PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0; +- tb->insert_size[0] = n_size; ++ tb->insert_size[0] = size; + } + + void padd_item(char *item, int total_length, int length) +@@ -1156,11 +1152,11 @@ int reiserfs_delete_item(struct reiserfs + struct item_head s_ih; + struct item_head *q_ih; + int quota_cut_bytes; +- int n_ret_value, n_del_size, n_removed; ++ int ret_value, del_size, removed; + + #ifdef CONFIG_REISERFS_CHECK +- char c_mode; +- int n_iter = 0; ++ char mode; ++ int iter = 0; + #endif + + BUG_ON(!th->t_trans_id); +@@ -1169,34 +1165,34 @@ int reiserfs_delete_item(struct reiserfs + 0 /*size is unknown */ ); + + while (1) { +- n_removed = 0; ++ removed = 0; + + #ifdef CONFIG_REISERFS_CHECK +- n_iter++; +- c_mode = ++ iter++; ++ mode = + #endif + prepare_for_delete_or_cut(th, inode, path, +- item_key, &n_removed, +- &n_del_size, ++ item_key, &removed, ++ &del_size, + max_reiserfs_offset(inode)); + +- RFALSE(c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE"); ++ RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE"); + + copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); +- s_del_balance.insert_size[0] = n_del_size; ++ s_del_balance.insert_size[0] = del_size; + +- n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL); +- if (n_ret_value != REPEAT_SEARCH) ++ ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL); ++ if (ret_value != REPEAT_SEARCH) + break; + + PROC_INFO_INC(sb, delete_item_restarted); + + // file system changed, repeat search +- n_ret_value = ++ ret_value = + search_for_position_by_key(sb, item_key, path); +- if (n_ret_value == IO_ERROR) ++ if (ret_value == IO_ERROR) + break; +- if (n_ret_value == FILE_NOT_FOUND) { ++ if (ret_value == FILE_NOT_FOUND) { + reiserfs_warning(sb, "vs-5340", + "no items of the file %K found", + item_key); +@@ -1204,12 +1200,12 @@ int reiserfs_delete_item(struct reiserfs + } + } /* while (1) */ + +- if (n_ret_value != CARRY_ON) { ++ if (ret_value != CARRY_ON) { + unfix_nodes(&s_del_balance); + return 0; + } + // reiserfs_delete_item returns item length when success +- n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); ++ ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); + q_ih = get_ih(path); + quota_cut_bytes = ih_item_len(q_ih); + +@@ -1255,7 +1251,7 @@ int reiserfs_delete_item(struct reiserfs + off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1)); + memcpy(data + off, + B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih), +- n_ret_value); ++ ret_value); + kunmap_atomic(data, KM_USER0); + } + /* Perform balancing after all resources have been collected at once. */ +@@ -1269,7 +1265,7 @@ int reiserfs_delete_item(struct reiserfs + DQUOT_FREE_SPACE_NODIRTY(inode, quota_cut_bytes); + + /* Return deleted body length */ +- return n_ret_value; ++ return ret_value; + } + + /* Summary Of Mechanisms For Handling Collisions Between Processes: +@@ -1432,13 +1428,13 @@ static int maybe_indirect_to_direct(stru + struct page *page, + struct treepath *path, + const struct cpu_key *item_key, +- loff_t n_new_file_size, char *mode) ++ loff_t new_file_size, char *mode) + { + struct super_block *sb = inode->i_sb; +- int n_block_size = sb->s_blocksize; ++ int block_size = sb->s_blocksize; + int cut_bytes; + BUG_ON(!th->t_trans_id); +- BUG_ON(n_new_file_size != inode->i_size); ++ BUG_ON(new_file_size != inode->i_size); + + /* the page being sent in could be NULL if there was an i/o error + ** reading in the last block. The user will hit problems trying to +@@ -1450,15 +1446,15 @@ static int maybe_indirect_to_direct(stru + /* leave tail in an unformatted node */ + *mode = M_SKIP_BALANCING; + cut_bytes = +- n_block_size - (n_new_file_size & (n_block_size - 1)); ++ block_size - (new_file_size & (block_size - 1)); + pathrelse(path); + return cut_bytes; + } + /* Perform the conversion to a direct_item. */ + /* return indirect_to_direct(inode, path, item_key, +- n_new_file_size, mode); */ ++ new_file_size, mode); */ + return indirect2direct(th, inode, page, path, item_key, +- n_new_file_size, mode); ++ new_file_size, mode); + } + + /* we did indirect_to_direct conversion. And we have inserted direct +@@ -1512,7 +1508,7 @@ int reiserfs_cut_from_item(struct reiser + struct treepath *path, + struct cpu_key *item_key, + struct inode *inode, +- struct page *page, loff_t n_new_file_size) ++ struct page *page, loff_t new_file_size) + { + struct super_block *sb = inode->i_sb; + /* Every function which is going to call do_balance must first +@@ -1521,10 +1517,10 @@ int reiserfs_cut_from_item(struct reiser + After that we can make tree balancing. */ + struct tree_balance s_cut_balance; + struct item_head *p_le_ih; +- int n_cut_size = 0, /* Amount to be cut. */ +- n_ret_value = CARRY_ON, n_removed = 0, /* Number of the removed unformatted nodes. */ +- n_is_inode_locked = 0; +- char c_mode; /* Mode of the balance. */ ++ int cut_size = 0, /* Amount to be cut. */ ++ ret_value = CARRY_ON, removed = 0, /* Number of the removed unformatted nodes. */ ++ is_inode_locked = 0; ++ char mode; /* Mode of the balance. */ + int retval2 = -1; + int quota_cut_bytes; + loff_t tail_pos = 0; +@@ -1532,7 +1528,7 @@ int reiserfs_cut_from_item(struct reiser + BUG_ON(!th->t_trans_id); + + init_tb_struct(th, &s_cut_balance, inode->i_sb, path, +- n_cut_size); ++ cut_size); + + /* Repeat this loop until we either cut the item without needing + to balance, or we fix_nodes without schedule occurring */ +@@ -1542,30 +1538,30 @@ int reiserfs_cut_from_item(struct reiser + free unformatted nodes which are pointed to by the cut + pointers. */ + +- c_mode = ++ mode = + prepare_for_delete_or_cut(th, inode, path, +- item_key, &n_removed, +- &n_cut_size, n_new_file_size); +- if (c_mode == M_CONVERT) { ++ item_key, &removed, ++ &cut_size, new_file_size); ++ if (mode == M_CONVERT) { + /* convert last unformatted node to direct item or leave + tail in the unformatted node */ +- RFALSE(n_ret_value != CARRY_ON, ++ RFALSE(ret_value != CARRY_ON, + "PAP-5570: can not convert twice"); + +- n_ret_value = ++ ret_value = + maybe_indirect_to_direct(th, inode, page, + path, item_key, +- n_new_file_size, &c_mode); +- if (c_mode == M_SKIP_BALANCING) ++ new_file_size, &mode); ++ if (mode == M_SKIP_BALANCING) + /* tail has been left in the unformatted node */ +- return n_ret_value; ++ return ret_value; + +- n_is_inode_locked = 1; ++ is_inode_locked = 1; + + /* removing of last unformatted node will change value we + have to return to truncate. Save it */ +- retval2 = n_ret_value; +- /*retval2 = sb->s_blocksize - (n_new_file_size & (sb->s_blocksize - 1)); */ ++ retval2 = ret_value; ++ /*retval2 = sb->s_blocksize - (new_file_size & (sb->s_blocksize - 1)); */ + + /* So, we have performed the first part of the conversion: + inserting the new direct item. Now we are removing the +@@ -1573,10 +1569,10 @@ int reiserfs_cut_from_item(struct reiser + it. */ + set_cpu_key_k_type(item_key, TYPE_INDIRECT); + item_key->key_length = 4; +- n_new_file_size -= +- (n_new_file_size & (sb->s_blocksize - 1)); +- tail_pos = n_new_file_size; +- set_cpu_key_k_offset(item_key, n_new_file_size + 1); ++ new_file_size -= ++ (new_file_size & (sb->s_blocksize - 1)); ++ tail_pos = new_file_size; ++ set_cpu_key_k_offset(item_key, new_file_size + 1); + if (search_for_position_by_key + (sb, item_key, + path) == POSITION_NOT_FOUND) { +@@ -1589,38 +1585,38 @@ int reiserfs_cut_from_item(struct reiser + } + continue; + } +- if (n_cut_size == 0) { ++ if (cut_size == 0) { + pathrelse(path); + return 0; + } + +- s_cut_balance.insert_size[0] = n_cut_size; ++ s_cut_balance.insert_size[0] = cut_size; + +- n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, NULL); +- if (n_ret_value != REPEAT_SEARCH) ++ ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL); ++ if (ret_value != REPEAT_SEARCH) + break; + + PROC_INFO_INC(sb, cut_from_item_restarted); + +- n_ret_value = ++ ret_value = + search_for_position_by_key(sb, item_key, path); +- if (n_ret_value == POSITION_FOUND) ++ if (ret_value == POSITION_FOUND) + continue; + + reiserfs_warning(sb, "PAP-5610", "item %K not found", + item_key); + unfix_nodes(&s_cut_balance); +- return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT; ++ return (ret_value == IO_ERROR) ? -EIO : -ENOENT; + } /* while */ + + // check fix_nodes results (IO_ERROR or NO_DISK_SPACE) +- if (n_ret_value != CARRY_ON) { +- if (n_is_inode_locked) { ++ if (ret_value != CARRY_ON) { ++ if (is_inode_locked) { + // FIXME: this seems to be not needed: we are always able + // to cut item + indirect_to_direct_roll_back(th, inode, path); + } +- if (n_ret_value == NO_DISK_SPACE) ++ if (ret_value == NO_DISK_SPACE) + reiserfs_warning(sb, "reiserfs-5092", + "NO_DISK_SPACE"); + unfix_nodes(&s_cut_balance); +@@ -1629,24 +1625,24 @@ int reiserfs_cut_from_item(struct reiser + + /* go ahead and perform balancing */ + +- RFALSE(c_mode == M_PASTE || c_mode == M_INSERT, "invalid mode"); ++ RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode"); + + /* Calculate number of bytes that need to be cut from the item. */ + quota_cut_bytes = +- (c_mode == ++ (mode == + M_DELETE) ? ih_item_len(get_ih(path)) : -s_cut_balance. + insert_size[0]; + if (retval2 == -1) +- n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode); ++ ret_value = calc_deleted_bytes_number(&s_cut_balance, mode); + else +- n_ret_value = retval2; ++ ret_value = retval2; + + /* For direct items, we only change the quota when deleting the last + ** item. + */ + p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path); + if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) { +- if (c_mode == M_DELETE && ++ if (mode == M_DELETE && + (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) == + 1) { + // FIXME: this is to keep 3.5 happy +@@ -1657,7 +1653,7 @@ int reiserfs_cut_from_item(struct reiser + } + } + #ifdef CONFIG_REISERFS_CHECK +- if (n_is_inode_locked) { ++ if (is_inode_locked) { + struct item_head *le_ih = + PATH_PITEM_HEAD(s_cut_balance.tb_path); + /* we are going to complete indirect2direct conversion. Make +@@ -1667,13 +1663,13 @@ int reiserfs_cut_from_item(struct reiser + reiserfs_panic(sb, "vs-5652", + "item must be indirect %h", le_ih); + +- if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE) ++ if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE) + reiserfs_panic(sb, "vs-5653", "completing " + "indirect2direct conversion indirect " + "item %h being deleted must be of " + "4 byte long", le_ih); + +- if (c_mode == M_CUT ++ if (mode == M_CUT + && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) { + reiserfs_panic(sb, "vs-5654", "can not complete " + "indirect2direct conversion of %h " +@@ -1685,8 +1681,8 @@ int reiserfs_cut_from_item(struct reiser + } + #endif + +- do_balance(&s_cut_balance, NULL, NULL, c_mode); +- if (n_is_inode_locked) { ++ do_balance(&s_cut_balance, NULL, NULL, mode); ++ if (is_inode_locked) { + /* we've done an indirect->direct conversion. when the data block + ** was freed, it was removed from the list of blocks that must + ** be flushed before the transaction commits, make sure to +@@ -1701,7 +1697,7 @@ int reiserfs_cut_from_item(struct reiser + quota_cut_bytes, inode->i_uid, '?'); + #endif + DQUOT_FREE_SPACE_NODIRTY(inode, quota_cut_bytes); +- return n_ret_value; ++ return ret_value; + } + + static void truncate_directory(struct reiserfs_transaction_handle *th, +@@ -1733,9 +1729,9 @@ int reiserfs_do_truncate(struct reiserfs + INITIALIZE_PATH(s_search_path); /* Path to the current object item. */ + struct item_head *p_le_ih; /* Pointer to an item header. */ + struct cpu_key s_item_key; /* Key to search for a previous file item. */ +- loff_t n_file_size, /* Old file size. */ +- n_new_file_size; /* New file size. */ +- int n_deleted; /* Number of deleted or truncated bytes. */ ++ loff_t file_size, /* Old file size. */ ++ new_file_size; /* New file size. */ ++ int deleted; /* Number of deleted or truncated bytes. */ + int retval; + int err = 0; + +@@ -1752,7 +1748,7 @@ int reiserfs_do_truncate(struct reiserfs + } + + /* Get new file size. */ +- n_new_file_size = inode->i_size; ++ new_file_size = inode->i_size; + + // FIXME: note, that key type is unimportant here + make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode), +@@ -1782,7 +1778,7 @@ int reiserfs_do_truncate(struct reiserfs + /* Get real file size (total length of all file items) */ + p_le_ih = PATH_PITEM_HEAD(&s_search_path); + if (is_statdata_le_ih(p_le_ih)) +- n_file_size = 0; ++ file_size = 0; + else { + loff_t offset = le_ih_k_offset(p_le_ih); + int bytes = +@@ -1791,42 +1787,42 @@ int reiserfs_do_truncate(struct reiserfs + /* this may mismatch with real file size: if last direct item + had no padding zeros and last unformatted node had no free + space, this file would have this file size */ +- n_file_size = offset + bytes - 1; ++ file_size = offset + bytes - 1; + } + /* + * are we doing a full truncate or delete, if so + * kick in the reada code + */ +- if (n_new_file_size == 0) ++ if (new_file_size == 0) + s_search_path.reada = PATH_READA | PATH_READA_BACK; + +- if (n_file_size == 0 || n_file_size < n_new_file_size) { ++ if (file_size == 0 || file_size < new_file_size) { + goto update_and_out; + } + + /* Update key to search for the last file item. */ +- set_cpu_key_k_offset(&s_item_key, n_file_size); ++ set_cpu_key_k_offset(&s_item_key, file_size); + + do { + /* Cut or delete file item. */ +- n_deleted = ++ deleted = + reiserfs_cut_from_item(th, &s_search_path, &s_item_key, +- inode, page, n_new_file_size); +- if (n_deleted < 0) { ++ inode, page, new_file_size); ++ if (deleted < 0) { + reiserfs_warning(inode->i_sb, "vs-5665", + "reiserfs_cut_from_item failed"); + reiserfs_check_path(&s_search_path); + return 0; + } + +- RFALSE(n_deleted > n_file_size, ++ RFALSE(deleted > file_size, + "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K", +- n_deleted, n_file_size, &s_item_key); ++ deleted, file_size, &s_item_key); + + /* Change key to search the last file item. */ +- n_file_size -= n_deleted; ++ file_size -= deleted; + +- set_cpu_key_k_offset(&s_item_key, n_file_size); ++ set_cpu_key_k_offset(&s_item_key, file_size); + + /* While there are bytes to truncate and previous file item is presented in the tree. */ + +@@ -1857,13 +1853,13 @@ int reiserfs_do_truncate(struct reiserfs + goto out; + reiserfs_update_inode_transaction(inode); + } +- } while (n_file_size > ROUND_UP(n_new_file_size) && ++ } while (file_size > ROUND_UP(new_file_size) && + search_for_position_by_key(inode->i_sb, &s_item_key, + &s_search_path) == POSITION_FOUND); + +- RFALSE(n_file_size > ROUND_UP(n_new_file_size), ++ RFALSE(file_size > ROUND_UP(new_file_size), + "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d", +- n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid); ++ new_file_size, file_size, s_item_key.on_disk_key.k_objectid); + + update_and_out: + if (update_timestamps) { +@@ -1918,7 +1914,7 @@ int reiserfs_paste_into_item(struct reis + const struct cpu_key *key, /* Key to search for the needed item. */ + struct inode *inode, /* Inode item belongs to */ + const char *body, /* Pointer to the bytes to paste. */ +- int n_pasted_size) ++ int pasted_size) + { /* Size of pasted bytes. */ + struct tree_balance s_paste_balance; + int retval; +@@ -1931,16 +1927,16 @@ int reiserfs_paste_into_item(struct reis + #ifdef REISERQUOTA_DEBUG + reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, + "reiserquota paste_into_item(): allocating %u id=%u type=%c", +- n_pasted_size, inode->i_uid, ++ pasted_size, inode->i_uid, + key2type(&(key->on_disk_key))); + #endif + +- if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) { ++ if (DQUOT_ALLOC_SPACE_NODIRTY(inode, pasted_size)) { + pathrelse(search_path); + return -EDQUOT; + } + init_tb_struct(th, &s_paste_balance, th->t_super, search_path, +- n_pasted_size); ++ pasted_size); + #ifdef DISPLACE_NEW_PACKING_LOCALITIES + s_paste_balance.key = key->on_disk_key; + #endif +@@ -1988,10 +1984,10 @@ int reiserfs_paste_into_item(struct reis + #ifdef REISERQUOTA_DEBUG + reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, + "reiserquota paste_into_item(): freeing %u id=%u type=%c", +- n_pasted_size, inode->i_uid, ++ pasted_size, inode->i_uid, + key2type(&(key->on_disk_key))); + #endif +- DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size); ++ DQUOT_FREE_SPACE_NODIRTY(inode, pasted_size); + return retval; + } + +--- a/fs/reiserfs/tail_conversion.c ++++ b/fs/reiserfs/tail_conversion.c +@@ -26,7 +26,7 @@ int direct2indirect(struct reiserfs_tran + converted item. */ + struct item_head ind_ih; /* new indirect item to be inserted or + key of unfm pointer to be pasted */ +- int n_blk_size, n_retval; /* returned value for reiserfs_insert_item and clones */ ++ int blk_size, retval; /* returned value for reiserfs_insert_item and clones */ + unp_t unfm_ptr; /* Handle on an unformatted node + that will be inserted in the + tree. */ +@@ -35,7 +35,7 @@ int direct2indirect(struct reiserfs_tran + + REISERFS_SB(sb)->s_direct2indirect++; + +- n_blk_size = sb->s_blocksize; ++ blk_size = sb->s_blocksize; + + /* and key to search for append or insert pointer to the new + unformatted node. */ +@@ -64,17 +64,17 @@ int direct2indirect(struct reiserfs_tran + set_ih_free_space(&ind_ih, 0); /* delete at nearest future */ + put_ih_item_len(&ind_ih, UNFM_P_SIZE); + PATH_LAST_POSITION(path)++; +- n_retval = ++ retval = + reiserfs_insert_item(th, path, &end_key, &ind_ih, inode, + (char *)&unfm_ptr); + } else { + /* Paste into last indirect item of an object. */ +- n_retval = reiserfs_paste_into_item(th, path, &end_key, inode, ++ retval = reiserfs_paste_into_item(th, path, &end_key, inode, + (char *)&unfm_ptr, + UNFM_P_SIZE); + } +- if (n_retval) { +- return n_retval; ++ if (retval) { ++ return retval; + } + // note: from here there are two keys which have matching first + // three key components. They only differ by the fourth one. +@@ -98,7 +98,7 @@ int direct2indirect(struct reiserfs_tran + RFALSE(!is_direct_le_ih(p_le_ih), + "vs-14055: direct item expected(%K), found %h", + &end_key, p_le_ih); +- tail_size = (le_ih_k_offset(p_le_ih) & (n_blk_size - 1)) ++ tail_size = (le_ih_k_offset(p_le_ih) & (blk_size - 1)) + + ih_item_len(p_le_ih) - 1; + + /* we only send the unbh pointer if the buffer is not up to date. +@@ -113,11 +113,11 @@ int direct2indirect(struct reiserfs_tran + } else { + up_to_date_bh = unbh; + } +- n_retval = reiserfs_delete_item(th, path, &end_key, inode, ++ retval = reiserfs_delete_item(th, path, &end_key, inode, + up_to_date_bh); + +- total_tail += n_retval; +- if (tail_size == n_retval) ++ total_tail += retval; ++ if (tail_size == retval) + // done: file does not have direct items anymore + break; + +@@ -129,7 +129,7 @@ int direct2indirect(struct reiserfs_tran + unsigned pgoff = + (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1); + char *kaddr = kmap_atomic(up_to_date_bh->b_page, KM_USER0); +- memset(kaddr + pgoff, 0, n_blk_size - total_tail); ++ memset(kaddr + pgoff, 0, blk_size - total_tail); + kunmap_atomic(kaddr, KM_USER0); + } + +@@ -181,7 +181,7 @@ int indirect2direct(struct reiserfs_tran + { + struct super_block *sb = inode->i_sb; + struct item_head s_ih; +- unsigned long n_block_size = sb->s_blocksize; ++ unsigned long block_size = sb->s_blocksize; + char *tail; + int tail_len, round_tail_len; + loff_t pos, pos1; /* position of first byte of the tail */ +@@ -196,7 +196,7 @@ int indirect2direct(struct reiserfs_tran + /* store item head path points to. */ + copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); + +- tail_len = (n_new_file_size & (n_block_size - 1)); ++ tail_len = (n_new_file_size & (block_size - 1)); + if (get_inode_sd_version(inode) == STAT_DATA_V2) + round_tail_len = ROUND_UP(tail_len); + else +@@ -257,7 +257,7 @@ int indirect2direct(struct reiserfs_tran + unformatted node. For now i_size is considered as guard for + going out of file size */ + kunmap(page); +- return n_block_size - round_tail_len; ++ return block_size - round_tail_len; + } + kunmap(page); + +@@ -276,5 +276,5 @@ int indirect2direct(struct reiserfs_tran + /* mark_file_with_tail (inode, pos1 + 1); */ + REISERFS_I(inode)->i_first_direct_byte = pos1 + 1; + +- return n_block_size - round_tail_len; ++ return block_size - round_tail_len; + } diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_._.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_._.diff new file mode 100644 index 0000000000..c40136a702 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_._.diff @@ -0,0 +1,1816 @@ +From: Jeff Mahoney +Subject: reiserfs: rename p_._ variables + + This patch is a simple s/p_._//g to the reiserfs code. This is the fifth + in a series of patches to rip out some of the awful variable naming in + reiserfs. + +Signed-off-by: Jeff Mahoney + +--- + + fs/reiserfs/file.c | 6 + fs/reiserfs/fix_node.c | 169 +++++++-------- + fs/reiserfs/stree.c | 472 +++++++++++++++++++++--------------------- + fs/reiserfs/tail_conversion.c | 28 +- + include/linux/reiserfs_fs.h | 46 ++-- + 5 files changed, 365 insertions(+), 356 deletions(-) + +--- a/fs/reiserfs/file.c ++++ b/fs/reiserfs/file.c +@@ -134,10 +134,10 @@ static void reiserfs_vfs_truncate_file(s + * be removed... + */ + +-static int reiserfs_sync_file(struct file *p_s_filp, +- struct dentry *p_s_dentry, int datasync) ++static int reiserfs_sync_file(struct file *filp, ++ struct dentry *dentry, int datasync) + { +- struct inode *inode = p_s_dentry->d_inode; ++ struct inode *inode = dentry->d_inode; + int n_err; + int barrier_done; + +--- a/fs/reiserfs/fix_node.c ++++ b/fs/reiserfs/fix_node.c +@@ -780,9 +780,9 @@ static void free_buffers_in_tb(struct tr + /* The function is NOT SCHEDULE-SAFE! */ + static int get_empty_nodes(struct tree_balance *tb, int n_h) + { +- struct buffer_head *p_s_new_bh, +- *p_s_Sh = PATH_H_PBUFFER(tb->tb_path, n_h); +- b_blocknr_t *p_n_blocknr, a_n_blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; ++ struct buffer_head *new_bh, ++ *Sh = PATH_H_PBUFFER(tb->tb_path, n_h); ++ b_blocknr_t *blocknr, a_n_blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; + int n_counter, n_number_of_freeblk, n_amount_needed, /* number of needed empty blocks */ + n_retval = CARRY_ON; + struct super_block *sb = tb->tb_sb; +@@ -810,8 +810,8 @@ static int get_empty_nodes(struct tree_b + 1) : 0; + + /* Allocate missing empty blocks. */ +- /* if p_s_Sh == 0 then we are getting a new root */ +- n_amount_needed = (p_s_Sh) ? (tb->blknum[n_h] - 1) : 1; ++ /* if Sh == 0 then we are getting a new root */ ++ n_amount_needed = (Sh) ? (tb->blknum[n_h] - 1) : 1; + /* Amount_needed = the amount that we need more than the amount that we have. */ + if (n_amount_needed > n_number_of_freeblk) + n_amount_needed -= n_number_of_freeblk; +@@ -824,25 +824,25 @@ static int get_empty_nodes(struct tree_b + return NO_DISK_SPACE; + + /* for each blocknumber we just got, get a buffer and stick it on FEB */ +- for (p_n_blocknr = a_n_blocknrs, n_counter = 0; +- n_counter < n_amount_needed; p_n_blocknr++, n_counter++) { ++ for (blocknr = a_n_blocknrs, n_counter = 0; ++ n_counter < n_amount_needed; blocknr++, n_counter++) { + +- RFALSE(!*p_n_blocknr, ++ RFALSE(!*blocknr, + "PAP-8135: reiserfs_new_blocknrs failed when got new blocks"); + +- p_s_new_bh = sb_getblk(sb, *p_n_blocknr); +- RFALSE(buffer_dirty(p_s_new_bh) || +- buffer_journaled(p_s_new_bh) || +- buffer_journal_dirty(p_s_new_bh), ++ new_bh = sb_getblk(sb, *blocknr); ++ RFALSE(buffer_dirty(new_bh) || ++ buffer_journaled(new_bh) || ++ buffer_journal_dirty(new_bh), + "PAP-8140: journlaled or dirty buffer %b for the new block", +- p_s_new_bh); ++ new_bh); + + /* Put empty buffers into the array. */ + RFALSE(tb->FEB[tb->cur_blknum], + "PAP-8141: busy slot for new buffer"); + +- set_buffer_journal_new(p_s_new_bh); +- tb->FEB[tb->cur_blknum++] = p_s_new_bh; ++ set_buffer_journal_new(new_bh); ++ tb->FEB[tb->cur_blknum++] = new_bh; + } + + if (n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB(tb)) +@@ -898,7 +898,7 @@ static int get_rfree(struct tree_balance + /* Check whether left neighbor is in memory. */ + static int is_left_neighbor_in_cache(struct tree_balance *tb, int n_h) + { +- struct buffer_head *p_s_father, *left; ++ struct buffer_head *father, *left; + struct super_block *sb = tb->tb_sb; + b_blocknr_t n_left_neighbor_blocknr; + int n_left_neighbor_position; +@@ -908,18 +908,18 @@ static int is_left_neighbor_in_cache(str + return 0; + + /* Calculate father of the node to be balanced. */ +- p_s_father = PATH_H_PBUFFER(tb->tb_path, n_h + 1); ++ father = PATH_H_PBUFFER(tb->tb_path, n_h + 1); + +- RFALSE(!p_s_father || +- !B_IS_IN_TREE(p_s_father) || ++ RFALSE(!father || ++ !B_IS_IN_TREE(father) || + !B_IS_IN_TREE(tb->FL[n_h]) || +- !buffer_uptodate(p_s_father) || ++ !buffer_uptodate(father) || + !buffer_uptodate(tb->FL[n_h]), + "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", +- p_s_father, tb->FL[n_h]); ++ father, tb->FL[n_h]); + + /* Get position of the pointer to the left neighbor into the left father. */ +- n_left_neighbor_position = (p_s_father == tb->FL[n_h]) ? ++ n_left_neighbor_position = (father == tb->FL[n_h]) ? + tb->lkey[n_h] : B_NR_ITEMS(tb->FL[n_h]); + /* Get left neighbor block number. */ + n_left_neighbor_blocknr = +@@ -940,10 +940,10 @@ static int is_left_neighbor_in_cache(str + #define LEFT_PARENTS 'l' + #define RIGHT_PARENTS 'r' + +-static void decrement_key(struct cpu_key *p_s_key) ++static void decrement_key(struct cpu_key *key) + { + // call item specific function for this key +- item_ops[cpu_key_k_type(p_s_key)]->decrement_key(p_s_key); ++ item_ops[cpu_key_k_type(key)]->decrement_key(key); + } + + /* Calculate far left/right parent of the left/right neighbor of the current node, that +@@ -956,17 +956,17 @@ static void decrement_key(struct cpu_key + */ + static int get_far_parent(struct tree_balance *tb, + int n_h, +- struct buffer_head **pp_s_father, +- struct buffer_head **pp_s_com_father, char c_lr_par) ++ struct buffer_head **pfather, ++ struct buffer_head **pcom_father, char c_lr_par) + { +- struct buffer_head *p_s_parent; ++ struct buffer_head *parent; + INITIALIZE_PATH(s_path_to_neighbor_father); +- struct treepath *p_s_path = tb->tb_path; ++ struct treepath *path = tb->tb_path; + struct cpu_key s_lr_father_key; + int n_counter, + n_position = INT_MAX, + n_first_last_position = 0, +- n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h); ++ n_path_offset = PATH_H_PATH_OFFSET(path, n_h); + + /* Starting from F[n_h] go upwards in the tree, and look for the common + ancestor of F[n_h], and its neighbor l/r, that should be obtained. */ +@@ -979,25 +979,25 @@ static int get_far_parent(struct tree_ba + for (; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter--) { + /* Check whether parent of the current buffer in the path is really parent in the tree. */ + if (!B_IS_IN_TREE +- (p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1))) ++ (parent = PATH_OFFSET_PBUFFER(path, n_counter - 1))) + return REPEAT_SEARCH; + /* Check whether position in the parent is correct. */ + if ((n_position = +- PATH_OFFSET_POSITION(p_s_path, ++ PATH_OFFSET_POSITION(path, + n_counter - 1)) > +- B_NR_ITEMS(p_s_parent)) ++ B_NR_ITEMS(parent)) + return REPEAT_SEARCH; + /* Check whether parent at the path really points to the child. */ +- if (B_N_CHILD_NUM(p_s_parent, n_position) != +- PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr) ++ if (B_N_CHILD_NUM(parent, n_position) != ++ PATH_OFFSET_PBUFFER(path, n_counter)->b_blocknr) + return REPEAT_SEARCH; + /* Return delimiting key if position in the parent is not equal to first/last one. */ + if (c_lr_par == RIGHT_PARENTS) +- n_first_last_position = B_NR_ITEMS(p_s_parent); ++ n_first_last_position = B_NR_ITEMS(parent); + if (n_position != n_first_last_position) { +- *pp_s_com_father = p_s_parent; +- get_bh(*pp_s_com_father); +- /*(*pp_s_com_father = p_s_parent)->b_count++; */ ++ *pcom_father = parent; ++ get_bh(*pcom_father); ++ /*(*pcom_father = parent)->b_count++; */ + break; + } + } +@@ -1009,22 +1009,22 @@ static int get_far_parent(struct tree_ba + (tb->tb_path, + FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == + SB_ROOT_BLOCK(tb->tb_sb)) { +- *pp_s_father = *pp_s_com_father = NULL; ++ *pfather = *pcom_father = NULL; + return CARRY_ON; + } + return REPEAT_SEARCH; + } + +- RFALSE(B_LEVEL(*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL, ++ RFALSE(B_LEVEL(*pcom_father) <= DISK_LEAF_NODE_LEVEL, + "PAP-8185: (%b %z) level too small", +- *pp_s_com_father, *pp_s_com_father); ++ *pcom_father, *pcom_father); + + /* Check whether the common parent is locked. */ + +- if (buffer_locked(*pp_s_com_father)) { +- __wait_on_buffer(*pp_s_com_father); ++ if (buffer_locked(*pcom_father)) { ++ __wait_on_buffer(*pcom_father); + if (FILESYSTEM_CHANGED_TB(tb)) { +- brelse(*pp_s_com_father); ++ brelse(*pcom_father); + return REPEAT_SEARCH; + } + } +@@ -1034,7 +1034,7 @@ static int get_far_parent(struct tree_ba + + /* Form key to get parent of the left/right neighbor. */ + le_key2cpu_key(&s_lr_father_key, +- B_N_PDELIM_KEY(*pp_s_com_father, ++ B_N_PDELIM_KEY(*pcom_father, + (c_lr_par == + LEFT_PARENTS) ? (tb->lkey[n_h - 1] = + n_position - +@@ -1053,14 +1053,14 @@ static int get_far_parent(struct tree_ba + + if (FILESYSTEM_CHANGED_TB(tb)) { + pathrelse(&s_path_to_neighbor_father); +- brelse(*pp_s_com_father); ++ brelse(*pcom_father); + return REPEAT_SEARCH; + } + +- *pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father); ++ *pfather = PATH_PLAST_BUFFER(&s_path_to_neighbor_father); + +- RFALSE(B_LEVEL(*pp_s_father) != n_h + 1, +- "PAP-8190: (%b %z) level too small", *pp_s_father, *pp_s_father); ++ RFALSE(B_LEVEL(*pfather) != n_h + 1, ++ "PAP-8190: (%b %z) level too small", *pfather, *pfather); + RFALSE(s_path_to_neighbor_father.path_length < + FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small"); + +@@ -1078,11 +1078,11 @@ static int get_far_parent(struct tree_ba + */ + static int get_parents(struct tree_balance *tb, int n_h) + { +- struct treepath *p_s_path = tb->tb_path; ++ struct treepath *path = tb->tb_path; + int n_position, + n_ret_value, + n_path_offset = PATH_H_PATH_OFFSET(tb->tb_path, n_h); +- struct buffer_head *p_s_curf, *p_s_curcf; ++ struct buffer_head *curf, *curcf; + + /* Current node is the root of the tree or will be root of the tree */ + if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) { +@@ -1100,66 +1100,65 @@ static int get_parents(struct tree_balan + } + + /* Get parent FL[n_path_offset] of L[n_path_offset]. */ +- if ((n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1))) { ++ n_position = PATH_OFFSET_POSITION(path, n_path_offset - 1); ++ if (n_position) { + /* Current node is not the first child of its parent. */ +- /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */ +- p_s_curf = p_s_curcf = +- PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1); +- get_bh(p_s_curf); +- get_bh(p_s_curf); ++ curf = PATH_OFFSET_PBUFFER(path, n_path_offset - 1); ++ curcf = PATH_OFFSET_PBUFFER(path, n_path_offset - 1); ++ get_bh(curf); ++ get_bh(curf); + tb->lkey[n_h] = n_position - 1; + } else { + /* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node. + Calculate current common parent of L[n_path_offset] and the current node. Note that + CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset]. + Calculate lkey[n_path_offset]. */ +- if ((n_ret_value = get_far_parent(tb, n_h + 1, &p_s_curf, +- &p_s_curcf, ++ if ((n_ret_value = get_far_parent(tb, n_h + 1, &curf, ++ &curcf, + LEFT_PARENTS)) != CARRY_ON) + return n_ret_value; + } + + brelse(tb->FL[n_h]); +- tb->FL[n_h] = p_s_curf; /* New initialization of FL[n_h]. */ ++ tb->FL[n_h] = curf; /* New initialization of FL[n_h]. */ + brelse(tb->CFL[n_h]); +- tb->CFL[n_h] = p_s_curcf; /* New initialization of CFL[n_h]. */ ++ tb->CFL[n_h] = curcf; /* New initialization of CFL[n_h]. */ + +- RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) || +- (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)), +- "PAP-8195: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf); ++ RFALSE((curf && !B_IS_IN_TREE(curf)) || ++ (curcf && !B_IS_IN_TREE(curcf)), ++ "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf); + + /* Get parent FR[n_h] of R[n_h]. */ + + /* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */ +- if (n_position == B_NR_ITEMS(PATH_H_PBUFFER(p_s_path, n_h + 1))) { ++ if (n_position == B_NR_ITEMS(PATH_H_PBUFFER(path, n_h + 1))) { + /* Calculate current parent of R[n_h], which is the right neighbor of F[n_h]. + Calculate current common parent of R[n_h] and current node. Note that CFR[n_h] + not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */ + if ((n_ret_value = +- get_far_parent(tb, n_h + 1, &p_s_curf, &p_s_curcf, ++ get_far_parent(tb, n_h + 1, &curf, &curcf, + RIGHT_PARENTS)) != CARRY_ON) + return n_ret_value; + } else { + /* Current node is not the last child of its parent F[n_h]. */ +- /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */ +- p_s_curf = p_s_curcf = +- PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1); +- get_bh(p_s_curf); +- get_bh(p_s_curf); ++ curf = PATH_OFFSET_PBUFFER(path, n_path_offset - 1); ++ curcf = PATH_OFFSET_PBUFFER(path, n_path_offset - 1); ++ get_bh(curf); ++ get_bh(curf); + tb->rkey[n_h] = n_position; + } + + brelse(tb->FR[n_h]); + /* New initialization of FR[n_path_offset]. */ +- tb->FR[n_h] = p_s_curf; ++ tb->FR[n_h] = curf; + + brelse(tb->CFR[n_h]); + /* New initialization of CFR[n_path_offset]. */ +- tb->CFR[n_h] = p_s_curcf; ++ tb->CFR[n_h] = curcf; + +- RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) || +- (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)), +- "PAP-8205: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf); ++ RFALSE((curf && !B_IS_IN_TREE(curf)) || ++ (curcf && !B_IS_IN_TREE(curcf)), ++ "PAP-8205: FR (%b) or CFR (%b) is invalid", curf, curcf); + + return CARRY_ON; + } +@@ -1893,7 +1892,7 @@ static int check_balance(int mode, + static int get_direct_parent(struct tree_balance *tb, int n_h) + { + struct buffer_head *bh; +- struct treepath *p_s_path = tb->tb_path; ++ struct treepath *path = tb->tb_path; + int n_position, + n_path_offset = PATH_H_PATH_OFFSET(tb->tb_path, n_h); + +@@ -1903,27 +1902,27 @@ static int get_direct_parent(struct tree + RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1, + "PAP-8260: invalid offset in the path"); + +- if (PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)-> ++ if (PATH_OFFSET_PBUFFER(path, FIRST_PATH_ELEMENT_OFFSET)-> + b_blocknr == SB_ROOT_BLOCK(tb->tb_sb)) { + /* Root is not changed. */ +- PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL; +- PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0; ++ PATH_OFFSET_PBUFFER(path, n_path_offset - 1) = NULL; ++ PATH_OFFSET_POSITION(path, n_path_offset - 1) = 0; + return CARRY_ON; + } + return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */ + } + + if (!B_IS_IN_TREE +- (bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))) ++ (bh = PATH_OFFSET_PBUFFER(path, n_path_offset - 1))) + return REPEAT_SEARCH; /* Parent in the path is not in the tree. */ + + if ((n_position = +- PATH_OFFSET_POSITION(p_s_path, ++ PATH_OFFSET_POSITION(path, + n_path_offset - 1)) > B_NR_ITEMS(bh)) + return REPEAT_SEARCH; + + if (B_N_CHILD_NUM(bh, n_position) != +- PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr) ++ PATH_OFFSET_PBUFFER(path, n_path_offset)->b_blocknr) + /* Parent in the path is not parent of the current node in the tree. */ + return REPEAT_SEARCH; + +@@ -2319,7 +2318,7 @@ static int wait_tb_buffers_until_unlocke + */ + + int fix_nodes(int n_op_mode, struct tree_balance *tb, +- struct item_head *p_s_ins_ih, const void *data) ++ struct item_head *ins_ih, const void *data) + { + int n_ret_value, n_h, n_item_num = PATH_LAST_POSITION(tb->tb_path); + int n_pos_in_item; +@@ -2405,7 +2404,7 @@ int fix_nodes(int n_op_mode, struct tree + goto repeat; + + n_ret_value = check_balance(n_op_mode, tb, n_h, n_item_num, +- n_pos_in_item, p_s_ins_ih, data); ++ n_pos_in_item, ins_ih, data); + if (n_ret_value != CARRY_ON) { + if (n_ret_value == NO_BALANCING_NEEDED) { + /* No balancing for higher levels needed. */ +--- a/fs/reiserfs/stree.c ++++ b/fs/reiserfs/stree.c +@@ -68,10 +68,10 @@ inline int B_IS_IN_TREE(const struct buf + // + // to gets item head in le form + // +-inline void copy_item_head(struct item_head *p_v_to, +- const struct item_head *p_v_from) ++inline void copy_item_head(struct item_head *to, ++ const struct item_head *from) + { +- memcpy(p_v_to, p_v_from, IH_SIZE); ++ memcpy(to, from, IH_SIZE); + } + + /* k1 is pointer to on-disk structure which is stored in little-endian +@@ -135,15 +135,15 @@ static inline int comp_keys(const struct + inline int comp_short_le_keys(const struct reiserfs_key *key1, + const struct reiserfs_key *key2) + { +- __u32 *p_s_1_u32, *p_s_2_u32; ++ __u32 *k1_u32, *k2_u32; + int n_key_length = REISERFS_SHORT_KEY_LEN; + +- p_s_1_u32 = (__u32 *) key1; +- p_s_2_u32 = (__u32 *) key2; +- for (; n_key_length--; ++p_s_1_u32, ++p_s_2_u32) { +- if (le32_to_cpu(*p_s_1_u32) < le32_to_cpu(*p_s_2_u32)) ++ k1_u32 = (__u32 *) key1; ++ k2_u32 = (__u32 *) key2; ++ for (; n_key_length--; ++k1_u32, ++k2_u32) { ++ if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32)) + return -1; +- if (le32_to_cpu(*p_s_1_u32) > le32_to_cpu(*p_s_2_u32)) ++ if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32)) + return 1; + } + return 0; +@@ -174,8 +174,8 @@ inline int comp_le_keys(const struct rei + * Binary search toolkit function * + * Search for an item in the array by the item key * + * Returns: 1 if found, 0 if not found; * +- * *p_n_pos = number of the searched element if found, else the * +- * number of the first element that is larger than p_v_key. * ++ * *pos = number of the searched element if found, else the * ++ * number of the first element that is larger than key. * + **************************************************************************/ + /* For those not familiar with binary search: n_lbound is the leftmost item that it + could be, n_rbound the rightmost item that it could be. We examine the item +@@ -184,28 +184,28 @@ inline int comp_le_keys(const struct rei + there are no possible items, and we have not found it. With each examination we + cut the number of possible items it could be by one more than half rounded down, + or we find it. */ +-static inline int bin_search(const void *p_v_key, /* Key to search for. */ +- const void *p_v_base, /* First item in the array. */ +- int p_n_num, /* Number of items in the array. */ +- int p_n_width, /* Item size in the array. +- searched. Lest the reader be +- confused, note that this is crafted +- as a general function, and when it +- is applied specifically to the array +- of item headers in a node, p_n_width +- is actually the item header size not +- the item size. */ +- int *p_n_pos /* Number of the searched for element. */ ++static inline int bin_search(const void *key, /* Key to search for. */ ++ const void *base, /* First item in the array. */ ++ int num, /* Number of items in the array. */ ++ int width, /* Item size in the array. ++ searched. Lest the reader be ++ confused, note that this is crafted ++ as a general function, and when it ++ is applied specifically to the array ++ of item headers in a node, width ++ is actually the item header size not ++ the item size. */ ++ int *pos /* Number of the searched for element. */ + ) + { + int n_rbound, n_lbound, n_j; + +- for (n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0)) / 2; ++ for (n_j = ((n_rbound = num - 1) + (n_lbound = 0)) / 2; + n_lbound <= n_rbound; n_j = (n_rbound + n_lbound) / 2) + switch (comp_keys +- ((struct reiserfs_key *)((char *)p_v_base + +- n_j * p_n_width), +- (struct cpu_key *)p_v_key)) { ++ ((struct reiserfs_key *)((char *)base + ++ n_j * width), ++ (struct cpu_key *)key)) { + case -1: + n_lbound = n_j + 1; + continue; +@@ -213,13 +213,13 @@ static inline int bin_search(const void + n_rbound = n_j - 1; + continue; + case 0: +- *p_n_pos = n_j; ++ *pos = n_j; + return ITEM_FOUND; /* Key found in the array. */ + } + + /* bin_search did not find given key, it returns position of key, + that is minimal and greater than the given one. */ +- *p_n_pos = n_lbound; ++ *pos = n_lbound; + return ITEM_NOT_FOUND; + } + +@@ -243,12 +243,12 @@ static const struct reiserfs_key MAX_KEY + the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this + case we return a special key, either MIN_KEY or MAX_KEY. */ + static inline const struct reiserfs_key *get_lkey(const struct treepath +- *p_s_chk_path, ++ *chk_path, + const struct super_block + *sb) + { +- int n_position, n_path_offset = p_s_chk_path->path_length; +- struct buffer_head *p_s_parent; ++ int n_position, n_path_offset = chk_path->path_length; ++ struct buffer_head *parent; + + RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET, + "PAP-5010: invalid offset in the path"); +@@ -257,42 +257,42 @@ static inline const struct reiserfs_key + while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { + + RFALSE(!buffer_uptodate +- (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)), ++ (PATH_OFFSET_PBUFFER(chk_path, n_path_offset)), + "PAP-5020: parent is not uptodate"); + + /* Parent at the path is not in the tree now. */ + if (!B_IS_IN_TREE +- (p_s_parent = +- PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset))) ++ (parent = ++ PATH_OFFSET_PBUFFER(chk_path, n_path_offset))) + return &MAX_KEY; + /* Check whether position in the parent is correct. */ + if ((n_position = +- PATH_OFFSET_POSITION(p_s_chk_path, ++ PATH_OFFSET_POSITION(chk_path, + n_path_offset)) > +- B_NR_ITEMS(p_s_parent)) ++ B_NR_ITEMS(parent)) + return &MAX_KEY; + /* Check whether parent at the path really points to the child. */ +- if (B_N_CHILD_NUM(p_s_parent, n_position) != +- PATH_OFFSET_PBUFFER(p_s_chk_path, ++ if (B_N_CHILD_NUM(parent, n_position) != ++ PATH_OFFSET_PBUFFER(chk_path, + n_path_offset + 1)->b_blocknr) + return &MAX_KEY; + /* Return delimiting key if position in the parent is not equal to zero. */ + if (n_position) +- return B_N_PDELIM_KEY(p_s_parent, n_position - 1); ++ return B_N_PDELIM_KEY(parent, n_position - 1); + } + /* Return MIN_KEY if we are in the root of the buffer tree. */ +- if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)-> ++ if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> + b_blocknr == SB_ROOT_BLOCK(sb)) + return &MIN_KEY; + return &MAX_KEY; + } + + /* Get delimiting key of the buffer at the path and its right neighbor. */ +-inline const struct reiserfs_key *get_rkey(const struct treepath *p_s_chk_path, ++inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path, + const struct super_block *sb) + { +- int n_position, n_path_offset = p_s_chk_path->path_length; +- struct buffer_head *p_s_parent; ++ int n_position, n_path_offset = chk_path->path_length; ++ struct buffer_head *parent; + + RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET, + "PAP-5030: invalid offset in the path"); +@@ -300,31 +300,31 @@ inline const struct reiserfs_key *get_rk + while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { + + RFALSE(!buffer_uptodate +- (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)), ++ (PATH_OFFSET_PBUFFER(chk_path, n_path_offset)), + "PAP-5040: parent is not uptodate"); + + /* Parent at the path is not in the tree now. */ + if (!B_IS_IN_TREE +- (p_s_parent = +- PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset))) ++ (parent = ++ PATH_OFFSET_PBUFFER(chk_path, n_path_offset))) + return &MIN_KEY; + /* Check whether position in the parent is correct. */ + if ((n_position = +- PATH_OFFSET_POSITION(p_s_chk_path, ++ PATH_OFFSET_POSITION(chk_path, + n_path_offset)) > +- B_NR_ITEMS(p_s_parent)) ++ B_NR_ITEMS(parent)) + return &MIN_KEY; + /* Check whether parent at the path really points to the child. */ +- if (B_N_CHILD_NUM(p_s_parent, n_position) != +- PATH_OFFSET_PBUFFER(p_s_chk_path, ++ if (B_N_CHILD_NUM(parent, n_position) != ++ PATH_OFFSET_PBUFFER(chk_path, + n_path_offset + 1)->b_blocknr) + return &MIN_KEY; + /* Return delimiting key if position in the parent is not the last one. */ +- if (n_position != B_NR_ITEMS(p_s_parent)) +- return B_N_PDELIM_KEY(p_s_parent, n_position); ++ if (n_position != B_NR_ITEMS(parent)) ++ return B_N_PDELIM_KEY(parent, n_position); + } + /* Return MAX_KEY if we are in the root of the buffer tree. */ +- if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)-> ++ if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> + b_blocknr == SB_ROOT_BLOCK(sb)) + return &MAX_KEY; + return &MIN_KEY; +@@ -335,25 +335,25 @@ inline const struct reiserfs_key *get_rk + the path. These delimiting keys are stored at least one level above that buffer in the tree. If the + buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in + this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */ +-static inline int key_in_buffer(struct treepath *p_s_chk_path, /* Path which should be checked. */ +- const struct cpu_key *p_s_key, /* Key which should be checked. */ +- struct super_block *sb /* Super block pointer. */ ++static inline int key_in_buffer(struct treepath *chk_path, /* Path which should be checked. */ ++ const struct cpu_key *key, /* Key which should be checked. */ ++ struct super_block *sb + ) + { + +- RFALSE(!p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET +- || p_s_chk_path->path_length > MAX_HEIGHT, ++ RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET ++ || chk_path->path_length > MAX_HEIGHT, + "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)", +- p_s_key, p_s_chk_path->path_length); +- RFALSE(!PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev, ++ key, chk_path->path_length); ++ RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev, + "PAP-5060: device must not be NODEV"); + +- if (comp_keys(get_lkey(p_s_chk_path, sb), p_s_key) == 1) ++ if (comp_keys(get_lkey(chk_path, sb), key) == 1) + /* left delimiting key is bigger, that the key we look for */ + return 0; +- // if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, sb)) != -1 ) +- if (comp_keys(get_rkey(p_s_chk_path, sb), p_s_key) != 1) +- /* p_s_key must be less than right delimitiing key */ ++ /* if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */ ++ if (comp_keys(get_rkey(chk_path, sb), key) != 1) ++ /* key must be less than right delimitiing key */ + return 0; + return 1; + } +@@ -369,34 +369,34 @@ int reiserfs_check_path(struct treepath + * dirty bits clean when preparing the buffer for the log. + * This version should only be called from fix_nodes() */ + void pathrelse_and_restore(struct super_block *sb, +- struct treepath *p_s_search_path) ++ struct treepath *search_path) + { +- int n_path_offset = p_s_search_path->path_length; ++ int n_path_offset = search_path->path_length; + + RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, + "clm-4000: invalid path offset"); + + while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) { + struct buffer_head *bh; +- bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--); ++ bh = PATH_OFFSET_PBUFFER(search_path, n_path_offset--); + reiserfs_restore_prepared_buffer(sb, bh); + brelse(bh); + } +- p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; ++ search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; + } + + /* Drop the reference to each buffer in a path */ +-void pathrelse(struct treepath *p_s_search_path) ++void pathrelse(struct treepath *search_path) + { +- int n_path_offset = p_s_search_path->path_length; ++ int n_path_offset = search_path->path_length; + + RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, + "PAP-5090: invalid path offset"); + + while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) +- brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--)); ++ brelse(PATH_OFFSET_PBUFFER(search_path, n_path_offset--)); + +- p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; ++ search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; + } + + static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) +@@ -547,9 +547,9 @@ static void search_by_key_reada(struct s + * Algorithm SearchByKey * + * look for item in the Disk S+Tree by its key * + * Input: sb - super block * +- * p_s_key - pointer to the key to search * ++ * key - pointer to the key to search * + * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR * +- * p_s_search_path - path from the root to the needed leaf * ++ * search_path - path from the root to the needed leaf * + **************************************************************************/ + + /* This function fills up the path from the root to the leaf as it +@@ -566,8 +566,8 @@ static void search_by_key_reada(struct s + correctness of the top of the path but need not be checked for the + correctness of the bottom of the path */ + /* The function is NOT SCHEDULE-SAFE! */ +-int search_by_key(struct super_block *sb, const struct cpu_key *p_s_key, /* Key to search. */ +- struct treepath *p_s_search_path,/* This structure was ++int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to search. */ ++ struct treepath *search_path,/* This structure was + allocated and initialized + by the calling + function. It is filled up +@@ -580,7 +580,7 @@ int search_by_key(struct super_block *sb + b_blocknr_t n_block_number; + int expected_level; + struct buffer_head *bh; +- struct path_element *p_s_last_element; ++ struct path_element *last_element; + int n_node_level, n_retval; + int right_neighbor_of_leaf_node; + int fs_gen; +@@ -598,7 +598,7 @@ int search_by_key(struct super_block *sb + we must be careful to release all nodes in a path before we either + discard the path struct or re-use the path struct, as we do here. */ + +- pathrelse(p_s_search_path); ++ pathrelse(search_path); + + right_neighbor_of_leaf_node = 0; + +@@ -615,18 +615,18 @@ int search_by_key(struct super_block *sb + "%s: there were %d iterations of " + "while loop looking for key %K", + current->comm, n_repeat_counter, +- p_s_key); ++ key); + #endif + + /* prep path to have another element added to it. */ +- p_s_last_element = +- PATH_OFFSET_PELEMENT(p_s_search_path, +- ++p_s_search_path->path_length); ++ last_element = ++ PATH_OFFSET_PELEMENT(search_path, ++ ++search_path->path_length); + fs_gen = get_generation(sb); + + /* Read the next tree node, and set the last element in the path to + have a pointer to it. */ +- if ((bh = p_s_last_element->pe_buffer = ++ if ((bh = last_element->pe_buffer = + sb_getblk(sb, n_block_number))) { + if (!buffer_uptodate(bh) && reada_count > 1) + search_by_key_reada(sb, reada_bh, +@@ -637,8 +637,8 @@ int search_by_key(struct super_block *sb + goto io_error; + } else { + io_error: +- p_s_search_path->path_length--; +- pathrelse(p_s_search_path); ++ search_path->path_length--; ++ pathrelse(search_path); + return IO_ERROR; + } + reada_count = 0; +@@ -652,12 +652,12 @@ int search_by_key(struct super_block *sb + if (fs_changed(fs_gen, sb) && + (!B_IS_IN_TREE(bh) || + B_LEVEL(bh) != expected_level || +- !key_in_buffer(p_s_search_path, p_s_key, sb))) { ++ !key_in_buffer(search_path, key, sb))) { + PROC_INFO_INC(sb, search_by_key_fs_changed); + PROC_INFO_INC(sb, search_by_key_restarted); + PROC_INFO_INC(sb, + sbk_restarted[expected_level - 1]); +- pathrelse(p_s_search_path); ++ pathrelse(search_path); + + /* Get the root block number so that we can repeat the search + starting from the root. */ +@@ -669,11 +669,11 @@ int search_by_key(struct super_block *sb + continue; + } + +- /* only check that the key is in the buffer if p_s_key is not ++ /* only check that the key is in the buffer if key is not + equal to the MAX_KEY. Latter case is only possible in + "finish_unfinished()" processing during mount. */ +- RFALSE(comp_keys(&MAX_KEY, p_s_key) && +- !key_in_buffer(p_s_search_path, p_s_key, sb), ++ RFALSE(comp_keys(&MAX_KEY, key) && ++ !key_in_buffer(search_path, key, sb), + "PAP-5130: key is not in the buffer"); + #ifdef CONFIG_REISERFS_CHECK + if (cur_tb) { +@@ -689,7 +689,7 @@ int search_by_key(struct super_block *sb + reiserfs_error(sb, "vs-5150", + "invalid format found in block %ld. " + "Fsck?", bh->b_blocknr); +- pathrelse(p_s_search_path); ++ pathrelse(search_path); + return IO_ERROR; + } + +@@ -702,12 +702,12 @@ int search_by_key(struct super_block *sb + "vs-5152: tree level (%d) is less than stop level (%d)", + n_node_level, n_stop_level); + +- n_retval = bin_search(p_s_key, B_N_PITEM_HEAD(bh, 0), ++ n_retval = bin_search(key, B_N_PITEM_HEAD(bh, 0), + B_NR_ITEMS(bh), + (n_node_level == + DISK_LEAF_NODE_LEVEL) ? IH_SIZE : + KEY_SIZE, +- &(p_s_last_element->pe_position)); ++ &(last_element->pe_position)); + if (n_node_level == n_stop_level) { + return n_retval; + } +@@ -715,7 +715,7 @@ int search_by_key(struct super_block *sb + /* we are not in the stop level */ + if (n_retval == ITEM_FOUND) + /* item has been found, so we choose the pointer which is to the right of the found one */ +- p_s_last_element->pe_position++; ++ last_element->pe_position++; + + /* if item was not found we choose the position which is to + the left of the found item. This requires no code, +@@ -725,23 +725,23 @@ int search_by_key(struct super_block *sb + an internal node. Now we calculate child block number by + position in the node. */ + n_block_number = +- B_N_CHILD_NUM(bh, p_s_last_element->pe_position); ++ B_N_CHILD_NUM(bh, last_element->pe_position); + + /* if we are going to read leaf nodes, try for read ahead as well */ +- if ((p_s_search_path->reada & PATH_READA) && ++ if ((search_path->reada & PATH_READA) && + n_node_level == DISK_LEAF_NODE_LEVEL + 1) { +- int pos = p_s_last_element->pe_position; ++ int pos = last_element->pe_position; + int limit = B_NR_ITEMS(bh); + struct reiserfs_key *le_key; + +- if (p_s_search_path->reada & PATH_READA_BACK) ++ if (search_path->reada & PATH_READA_BACK) + limit = 0; + while (reada_count < SEARCH_BY_KEY_READA) { + if (pos == limit) + break; + reada_blocks[reada_count++] = + B_N_CHILD_NUM(bh, pos); +- if (p_s_search_path->reada & PATH_READA_BACK) ++ if (search_path->reada & PATH_READA_BACK) + pos--; + else + pos++; +@@ -751,7 +751,7 @@ int search_by_key(struct super_block *sb + */ + le_key = B_N_PDELIM_KEY(bh, pos); + if (le32_to_cpu(le_key->k_objectid) != +- p_s_key->on_disk_key.k_objectid) { ++ key->on_disk_key.k_objectid) { + break; + } + } +@@ -760,11 +760,11 @@ int search_by_key(struct super_block *sb + } + + /* Form the path to an item and position in this item which contains +- file byte defined by p_s_key. If there is no such item ++ file byte defined by key. If there is no such item + corresponding to the key, we point the path to the item with +- maximal key less than p_s_key, and *p_n_pos_in_item is set to one ++ maximal key less than key, and *pos_in_item is set to one + past the last entry/byte in the item. If searching for entry in a +- directory item, and it is not found, *p_n_pos_in_item is set to one ++ directory item, and it is not found, *pos_in_item is set to one + entry more than the entry with maximal key which is less than the + sought key. + +@@ -777,7 +777,7 @@ int search_by_key(struct super_block *sb + /* The function is NOT SCHEDULE-SAFE! */ + int search_for_position_by_key(struct super_block *sb, /* Pointer to the super block. */ + const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */ +- struct treepath *p_s_search_path /* Filled up by this function. */ ++ struct treepath *search_path /* Filled up by this function. */ + ) + { + struct item_head *p_le_ih; /* pointer to on-disk structure */ +@@ -788,34 +788,34 @@ int search_for_position_by_key(struct su + + /* If searching for directory entry. */ + if (is_direntry_cpu_key(p_cpu_key)) +- return search_by_entry_key(sb, p_cpu_key, p_s_search_path, ++ return search_by_entry_key(sb, p_cpu_key, search_path, + &de); + + /* If not searching for directory entry. */ + + /* If item is found. */ +- retval = search_item(sb, p_cpu_key, p_s_search_path); ++ retval = search_item(sb, p_cpu_key, search_path); + if (retval == IO_ERROR) + return retval; + if (retval == ITEM_FOUND) { + + RFALSE(!ih_item_len + (B_N_PITEM_HEAD +- (PATH_PLAST_BUFFER(p_s_search_path), +- PATH_LAST_POSITION(p_s_search_path))), ++ (PATH_PLAST_BUFFER(search_path), ++ PATH_LAST_POSITION(search_path))), + "PAP-5165: item length equals zero"); + +- pos_in_item(p_s_search_path) = 0; ++ pos_in_item(search_path) = 0; + return POSITION_FOUND; + } + +- RFALSE(!PATH_LAST_POSITION(p_s_search_path), ++ RFALSE(!PATH_LAST_POSITION(search_path), + "PAP-5170: position equals zero"); + + /* Item is not found. Set path to the previous item. */ + p_le_ih = +- B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), +- --PATH_LAST_POSITION(p_s_search_path)); ++ B_N_PITEM_HEAD(PATH_PLAST_BUFFER(search_path), ++ --PATH_LAST_POSITION(search_path)); + n_blk_size = sb->s_blocksize; + + if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) { +@@ -829,9 +829,9 @@ int search_for_position_by_key(struct su + /* Needed byte is contained in the item pointed to by the path. */ + if (item_offset <= offset && + item_offset + op_bytes_number(p_le_ih, n_blk_size) > offset) { +- pos_in_item(p_s_search_path) = offset - item_offset; ++ pos_in_item(search_path) = offset - item_offset; + if (is_indirect_le_ih(p_le_ih)) { +- pos_in_item(p_s_search_path) /= n_blk_size; ++ pos_in_item(search_path) /= n_blk_size; + } + return POSITION_FOUND; + } +@@ -839,18 +839,18 @@ int search_for_position_by_key(struct su + /* Needed byte is not contained in the item pointed to by the + path. Set pos_in_item out of the item. */ + if (is_indirect_le_ih(p_le_ih)) +- pos_in_item(p_s_search_path) = ++ pos_in_item(search_path) = + ih_item_len(p_le_ih) / UNFM_P_SIZE; + else +- pos_in_item(p_s_search_path) = ih_item_len(p_le_ih); ++ pos_in_item(search_path) = ih_item_len(p_le_ih); + + return POSITION_NOT_FOUND; + } + + /* Compare given item and item pointed to by the path. */ +-int comp_items(const struct item_head *stored_ih, const struct treepath *p_s_path) ++int comp_items(const struct item_head *stored_ih, const struct treepath *path) + { +- struct buffer_head *bh = PATH_PLAST_BUFFER(p_s_path); ++ struct buffer_head *bh = PATH_PLAST_BUFFER(path); + struct item_head *ih; + + /* Last buffer at the path is not in the tree. */ +@@ -858,11 +858,11 @@ int comp_items(const struct item_head *s + return 1; + + /* Last path position is invalid. */ +- if (PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(bh)) ++ if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh)) + return 1; + + /* we need only to know, whether it is the same item */ +- ih = get_ih(p_s_path); ++ ih = get_ih(path); + return memcmp(stored_ih, ih, IH_SIZE); + } + +@@ -951,14 +951,14 @@ static inline int prepare_for_direntry_i + In case of file truncate calculate whether this item must be deleted/truncated or last + unformatted node of this item will be converted to a direct item. + This function returns a determination of what balance mode the calling function should employ. */ +-static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *p_s_path, const struct cpu_key *p_s_item_key, int *p_n_removed, /* Number of unformatted nodes which were removed ++static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *path, const struct cpu_key *item_key, int *removed, /* Number of unformatted nodes which were removed + from end of the file. */ +- int *p_n_cut_size, unsigned long long n_new_file_length /* MAX_KEY_OFFSET in case of delete. */ ++ int *cut_size, unsigned long long n_new_file_length /* MAX_KEY_OFFSET in case of delete. */ + ) + { + struct super_block *sb = inode->i_sb; +- struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_path); +- struct buffer_head *bh = PATH_PLAST_BUFFER(p_s_path); ++ struct item_head *p_le_ih = PATH_PITEM_HEAD(path); ++ struct buffer_head *bh = PATH_PLAST_BUFFER(path); + + BUG_ON(!th->t_trans_id); + +@@ -968,20 +968,20 @@ static char prepare_for_delete_or_cut(st + RFALSE(n_new_file_length != max_reiserfs_offset(inode), + "PAP-5210: mode must be M_DELETE"); + +- *p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih)); ++ *cut_size = -(IH_SIZE + ih_item_len(p_le_ih)); + return M_DELETE; + } + + /* Directory item. */ + if (is_direntry_le_ih(p_le_ih)) +- return prepare_for_direntry_item(p_s_path, p_le_ih, inode, ++ return prepare_for_direntry_item(path, p_le_ih, inode, + n_new_file_length, +- p_n_cut_size); ++ cut_size); + + /* Direct item. */ + if (is_direct_le_ih(p_le_ih)) +- return prepare_for_direct_item(p_s_path, p_le_ih, inode, +- n_new_file_length, p_n_cut_size); ++ return prepare_for_direct_item(path, p_le_ih, inode, ++ n_new_file_length, cut_size); + + /* Case of an indirect item. */ + { +@@ -1001,9 +1001,9 @@ static char prepare_for_delete_or_cut(st + + do { + need_re_search = 0; +- *p_n_cut_size = 0; +- bh = PATH_PLAST_BUFFER(p_s_path); +- copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); ++ *cut_size = 0; ++ bh = PATH_PLAST_BUFFER(path); ++ copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); + pos = I_UNFM_NUM(&s_ih); + + while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > n_new_file_length) { +@@ -1013,10 +1013,9 @@ static char prepare_for_delete_or_cut(st + /* Each unformatted block deletion may involve one additional + * bitmap block into the transaction, thereby the initial + * journal space reservation might not be enough. */ +- if (!delete && (*p_n_cut_size) != 0 && +- reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) { ++ if (!delete && (*cut_size) != 0 && ++ reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) + break; +- } + + unfm = (__le32 *)B_I_PITEM(bh, &s_ih) + pos - 1; + block = get_block_num(unfm, 0); +@@ -1030,17 +1029,17 @@ static char prepare_for_delete_or_cut(st + + cond_resched(); + +- if (item_moved (&s_ih, p_s_path)) { ++ if (item_moved (&s_ih, path)) { + need_re_search = 1; + break; + } + + pos --; +- (*p_n_removed) ++; +- (*p_n_cut_size) -= UNFM_P_SIZE; ++ (*removed)++; ++ (*cut_size) -= UNFM_P_SIZE; + + if (pos == 0) { +- (*p_n_cut_size) -= IH_SIZE; ++ (*cut_size) -= IH_SIZE; + result = M_DELETE; + break; + } +@@ -1050,10 +1049,10 @@ static char prepare_for_delete_or_cut(st + ** buffer */ + reiserfs_restore_prepared_buffer(sb, bh); + } while (need_re_search && +- search_for_position_by_key(sb, p_s_item_key, p_s_path) == POSITION_FOUND); +- pos_in_item(p_s_path) = pos * UNFM_P_SIZE; ++ search_for_position_by_key(sb, item_key, path) == POSITION_FOUND); ++ pos_in_item(path) = pos * UNFM_P_SIZE; + +- if (*p_n_cut_size == 0) { ++ if (*cut_size == 0) { + /* Nothing were cut. maybe convert last unformatted node to the + * direct item? */ + result = M_CONVERT; +@@ -1091,7 +1090,7 @@ static int calc_deleted_bytes_number(str + static void init_tb_struct(struct reiserfs_transaction_handle *th, + struct tree_balance *tb, + struct super_block *sb, +- struct treepath *p_s_path, int n_size) ++ struct treepath *path, int n_size) + { + + BUG_ON(!th->t_trans_id); +@@ -1099,9 +1098,9 @@ static void init_tb_struct(struct reiser + memset(tb, '\0', sizeof(struct tree_balance)); + tb->transaction_handle = th; + tb->tb_sb = sb; +- tb->tb_path = p_s_path; +- PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL; +- PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0; ++ tb->tb_path = path; ++ PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL; ++ PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0; + tb->insert_size[0] = n_size; + } + +@@ -1141,13 +1140,17 @@ char head2type(struct item_head *ih) + } + #endif + +-/* Delete object item. */ +-int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct treepath *p_s_path, /* Path to the deleted item. */ +- const struct cpu_key *p_s_item_key, /* Key to search for the deleted item. */ +- struct inode *inode, /* inode is here just to update +- * i_blocks and quotas */ +- struct buffer_head *p_s_un_bh) +-{ /* NULL or unformatted node pointer. */ ++/* Delete object item. ++ * th - active transaction handle ++ * path - path to the deleted item ++ * item_key - key to search for the deleted item ++ * indode - used for updating i_blocks and quotas ++ * un_bh - NULL or unformatted node pointer ++ */ ++int reiserfs_delete_item(struct reiserfs_transaction_handle *th, ++ struct treepath *path, const struct cpu_key *item_key, ++ struct inode *inode, struct buffer_head *un_bh) ++{ + struct super_block *sb = inode->i_sb; + struct tree_balance s_del_balance; + struct item_head s_ih; +@@ -1162,7 +1165,7 @@ int reiserfs_delete_item(struct reiserfs + + BUG_ON(!th->t_trans_id); + +- init_tb_struct(th, &s_del_balance, sb, p_s_path, ++ init_tb_struct(th, &s_del_balance, sb, path, + 0 /*size is unknown */ ); + + while (1) { +@@ -1172,14 +1175,14 @@ int reiserfs_delete_item(struct reiserfs + n_iter++; + c_mode = + #endif +- prepare_for_delete_or_cut(th, inode, p_s_path, +- p_s_item_key, &n_removed, ++ prepare_for_delete_or_cut(th, inode, path, ++ item_key, &n_removed, + &n_del_size, + max_reiserfs_offset(inode)); + + RFALSE(c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE"); + +- copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); ++ copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); + s_del_balance.insert_size[0] = n_del_size; + + n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL); +@@ -1190,13 +1193,13 @@ int reiserfs_delete_item(struct reiserfs + + // file system changed, repeat search + n_ret_value = +- search_for_position_by_key(sb, p_s_item_key, p_s_path); ++ search_for_position_by_key(sb, item_key, path); + if (n_ret_value == IO_ERROR) + break; + if (n_ret_value == FILE_NOT_FOUND) { + reiserfs_warning(sb, "vs-5340", + "no items of the file %K found", +- p_s_item_key); ++ item_key); + break; + } + } /* while (1) */ +@@ -1207,7 +1210,7 @@ int reiserfs_delete_item(struct reiserfs + } + // reiserfs_delete_item returns item length when success + n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); +- q_ih = get_ih(p_s_path); ++ q_ih = get_ih(path); + quota_cut_bytes = ih_item_len(q_ih); + + /* hack so the quota code doesn't have to guess if the file +@@ -1224,7 +1227,7 @@ int reiserfs_delete_item(struct reiserfs + } + } + +- if (p_s_un_bh) { ++ if (un_bh) { + int off; + char *data; + +@@ -1242,16 +1245,16 @@ int reiserfs_delete_item(struct reiserfs + ** The unformatted node must be dirtied later on. We can't be + ** sure here if the entire tail has been deleted yet. + ** +- ** p_s_un_bh is from the page cache (all unformatted nodes are ++ ** un_bh is from the page cache (all unformatted nodes are + ** from the page cache) and might be a highmem page. So, we +- ** can't use p_s_un_bh->b_data. ++ ** can't use un_bh->b_data. + ** -clm + */ + +- data = kmap_atomic(p_s_un_bh->b_page, KM_USER0); ++ data = kmap_atomic(un_bh->b_page, KM_USER0); + off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1)); + memcpy(data + off, +- B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih), ++ B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih), + n_ret_value); + kunmap_atomic(data, KM_USER0); + } +@@ -1427,9 +1430,9 @@ static void unmap_buffers(struct page *p + static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th, + struct inode *inode, + struct page *page, +- struct treepath *p_s_path, +- const struct cpu_key *p_s_item_key, +- loff_t n_new_file_size, char *p_c_mode) ++ struct treepath *path, ++ const struct cpu_key *item_key, ++ loff_t n_new_file_size, char *mode) + { + struct super_block *sb = inode->i_sb; + int n_block_size = sb->s_blocksize; +@@ -1445,17 +1448,17 @@ static int maybe_indirect_to_direct(stru + !tail_has_to_be_packed(inode) || + !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) { + /* leave tail in an unformatted node */ +- *p_c_mode = M_SKIP_BALANCING; ++ *mode = M_SKIP_BALANCING; + cut_bytes = + n_block_size - (n_new_file_size & (n_block_size - 1)); +- pathrelse(p_s_path); ++ pathrelse(path); + return cut_bytes; + } +- /* Permorm the conversion to a direct_item. */ +- /* return indirect_to_direct(inode, p_s_path, p_s_item_key, +- n_new_file_size, p_c_mode); */ +- return indirect2direct(th, inode, page, p_s_path, p_s_item_key, +- n_new_file_size, p_c_mode); ++ /* Perform the conversion to a direct_item. */ ++ /* return indirect_to_direct(inode, path, item_key, ++ n_new_file_size, mode); */ ++ return indirect2direct(th, inode, page, path, item_key, ++ n_new_file_size, mode); + } + + /* we did indirect_to_direct conversion. And we have inserted direct +@@ -1506,8 +1509,8 @@ static void indirect_to_direct_roll_back + + /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */ + int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, +- struct treepath *p_s_path, +- struct cpu_key *p_s_item_key, ++ struct treepath *path, ++ struct cpu_key *item_key, + struct inode *inode, + struct page *page, loff_t n_new_file_size) + { +@@ -1528,7 +1531,7 @@ int reiserfs_cut_from_item(struct reiser + + BUG_ON(!th->t_trans_id); + +- init_tb_struct(th, &s_cut_balance, inode->i_sb, p_s_path, ++ init_tb_struct(th, &s_cut_balance, inode->i_sb, path, + n_cut_size); + + /* Repeat this loop until we either cut the item without needing +@@ -1540,8 +1543,8 @@ int reiserfs_cut_from_item(struct reiser + pointers. */ + + c_mode = +- prepare_for_delete_or_cut(th, inode, p_s_path, +- p_s_item_key, &n_removed, ++ prepare_for_delete_or_cut(th, inode, path, ++ item_key, &n_removed, + &n_cut_size, n_new_file_size); + if (c_mode == M_CONVERT) { + /* convert last unformatted node to direct item or leave +@@ -1551,7 +1554,7 @@ int reiserfs_cut_from_item(struct reiser + + n_ret_value = + maybe_indirect_to_direct(th, inode, page, +- p_s_path, p_s_item_key, ++ path, item_key, + n_new_file_size, &c_mode); + if (c_mode == M_SKIP_BALANCING) + /* tail has been left in the unformatted node */ +@@ -1568,26 +1571,26 @@ int reiserfs_cut_from_item(struct reiser + inserting the new direct item. Now we are removing the + last unformatted node pointer. Set key to search for + it. */ +- set_cpu_key_k_type(p_s_item_key, TYPE_INDIRECT); +- p_s_item_key->key_length = 4; ++ set_cpu_key_k_type(item_key, TYPE_INDIRECT); ++ item_key->key_length = 4; + n_new_file_size -= + (n_new_file_size & (sb->s_blocksize - 1)); + tail_pos = n_new_file_size; +- set_cpu_key_k_offset(p_s_item_key, n_new_file_size + 1); ++ set_cpu_key_k_offset(item_key, n_new_file_size + 1); + if (search_for_position_by_key +- (sb, p_s_item_key, +- p_s_path) == POSITION_NOT_FOUND) { +- print_block(PATH_PLAST_BUFFER(p_s_path), 3, +- PATH_LAST_POSITION(p_s_path) - 1, +- PATH_LAST_POSITION(p_s_path) + 1); ++ (sb, item_key, ++ path) == POSITION_NOT_FOUND) { ++ print_block(PATH_PLAST_BUFFER(path), 3, ++ PATH_LAST_POSITION(path) - 1, ++ PATH_LAST_POSITION(path) + 1); + reiserfs_panic(sb, "PAP-5580", "item to " + "convert does not exist (%K)", +- p_s_item_key); ++ item_key); + } + continue; + } + if (n_cut_size == 0) { +- pathrelse(p_s_path); ++ pathrelse(path); + return 0; + } + +@@ -1600,12 +1603,12 @@ int reiserfs_cut_from_item(struct reiser + PROC_INFO_INC(sb, cut_from_item_restarted); + + n_ret_value = +- search_for_position_by_key(sb, p_s_item_key, p_s_path); ++ search_for_position_by_key(sb, item_key, path); + if (n_ret_value == POSITION_FOUND) + continue; + + reiserfs_warning(sb, "PAP-5610", "item %K not found", +- p_s_item_key); ++ item_key); + unfix_nodes(&s_cut_balance); + return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT; + } /* while */ +@@ -1615,7 +1618,7 @@ int reiserfs_cut_from_item(struct reiser + if (n_is_inode_locked) { + // FIXME: this seems to be not needed: we are always able + // to cut item +- indirect_to_direct_roll_back(th, inode, p_s_path); ++ indirect_to_direct_roll_back(th, inode, path); + } + if (n_ret_value == NO_DISK_SPACE) + reiserfs_warning(sb, "reiserfs-5092", +@@ -1631,7 +1634,7 @@ int reiserfs_cut_from_item(struct reiser + /* Calculate number of bytes that need to be cut from the item. */ + quota_cut_bytes = + (c_mode == +- M_DELETE) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance. ++ M_DELETE) ? ih_item_len(get_ih(path)) : -s_cut_balance. + insert_size[0]; + if (retval2 == -1) + n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode); +@@ -1878,7 +1881,7 @@ int reiserfs_do_truncate(struct reiserfs + #ifdef CONFIG_REISERFS_CHECK + // this makes sure, that we __append__, not overwrite or add holes + static void check_research_for_paste(struct treepath *path, +- const struct cpu_key *p_s_key) ++ const struct cpu_key *key) + { + struct item_head *found_ih = get_ih(path); + +@@ -1886,35 +1889,35 @@ static void check_research_for_paste(str + if (le_ih_k_offset(found_ih) + + op_bytes_number(found_ih, + get_last_bh(path)->b_size) != +- cpu_key_k_offset(p_s_key) ++ cpu_key_k_offset(key) + || op_bytes_number(found_ih, + get_last_bh(path)->b_size) != + pos_in_item(path)) + reiserfs_panic(NULL, "PAP-5720", "found direct item " + "%h or position (%d) does not match " + "to key %K", found_ih, +- pos_in_item(path), p_s_key); ++ pos_in_item(path), key); + } + if (is_indirect_le_ih(found_ih)) { + if (le_ih_k_offset(found_ih) + + op_bytes_number(found_ih, + get_last_bh(path)->b_size) != +- cpu_key_k_offset(p_s_key) ++ cpu_key_k_offset(key) + || I_UNFM_NUM(found_ih) != pos_in_item(path) + || get_ih_free_space(found_ih) != 0) + reiserfs_panic(NULL, "PAP-5730", "found indirect " + "item (%h) or position (%d) does not " + "match to key (%K)", +- found_ih, pos_in_item(path), p_s_key); ++ found_ih, pos_in_item(path), key); + } + } + #endif /* config reiserfs check */ + + /* Paste bytes to the existing item. Returns bytes number pasted into the item. */ +-int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *p_s_search_path, /* Path to the pasted item. */ +- const struct cpu_key *p_s_key, /* Key to search for the needed item. */ ++int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *search_path, /* Path to the pasted item. */ ++ const struct cpu_key *key, /* Key to search for the needed item. */ + struct inode *inode, /* Inode item belongs to */ +- const char *p_c_body, /* Pointer to the bytes to paste. */ ++ const char *body, /* Pointer to the bytes to paste. */ + int n_pasted_size) + { /* Size of pasted bytes. */ + struct tree_balance s_paste_balance; +@@ -1929,17 +1932,17 @@ int reiserfs_paste_into_item(struct reis + reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, + "reiserquota paste_into_item(): allocating %u id=%u type=%c", + n_pasted_size, inode->i_uid, +- key2type(&(p_s_key->on_disk_key))); ++ key2type(&(key->on_disk_key))); + #endif + + if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) { +- pathrelse(p_s_search_path); ++ pathrelse(search_path); + return -EDQUOT; + } +- init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, ++ init_tb_struct(th, &s_paste_balance, th->t_super, search_path, + n_pasted_size); + #ifdef DISPLACE_NEW_PACKING_LOCALITIES +- s_paste_balance.key = p_s_key->on_disk_key; ++ s_paste_balance.key = key->on_disk_key; + #endif + + /* DQUOT_* can schedule, must check before the fix_nodes */ +@@ -1949,13 +1952,13 @@ int reiserfs_paste_into_item(struct reis + + while ((retval = + fix_nodes(M_PASTE, &s_paste_balance, NULL, +- p_c_body)) == REPEAT_SEARCH) { ++ body)) == REPEAT_SEARCH) { + search_again: + /* file system changed while we were in the fix_nodes */ + PROC_INFO_INC(th->t_super, paste_into_item_restarted); + retval = +- search_for_position_by_key(th->t_super, p_s_key, +- p_s_search_path); ++ search_for_position_by_key(th->t_super, key, ++ search_path); + if (retval == IO_ERROR) { + retval = -EIO; + goto error_out; +@@ -1963,19 +1966,19 @@ int reiserfs_paste_into_item(struct reis + if (retval == POSITION_FOUND) { + reiserfs_warning(inode->i_sb, "PAP-5710", + "entry or pasted byte (%K) exists", +- p_s_key); ++ key); + retval = -EEXIST; + goto error_out; + } + #ifdef CONFIG_REISERFS_CHECK +- check_research_for_paste(p_s_search_path, p_s_key); ++ check_research_for_paste(search_path, key); + #endif + } + + /* Perform balancing after all resources are collected by fix_nodes, and + accessing them will not risk triggering schedule. */ + if (retval == CARRY_ON) { +- do_balance(&s_paste_balance, NULL /*ih */ , p_c_body, M_PASTE); ++ do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE); + return 0; + } + retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; +@@ -1986,17 +1989,23 @@ int reiserfs_paste_into_item(struct reis + reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, + "reiserquota paste_into_item(): freeing %u id=%u type=%c", + n_pasted_size, inode->i_uid, +- key2type(&(p_s_key->on_disk_key))); ++ key2type(&(key->on_disk_key))); + #endif + DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size); + return retval; + } + +-/* Insert new item into the buffer at the path. */ +-int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct treepath *p_s_path, /* Path to the inserteded item. */ +- const struct cpu_key *key, struct item_head *p_s_ih, /* Pointer to the item header to insert. */ +- struct inode *inode, const char *p_c_body) +-{ /* Pointer to the bytes to insert. */ ++/* Insert new item into the buffer at the path. ++ * th - active transaction handle ++ * path - path to the inserted item ++ * ih - pointer to the item header to insert ++ * body - pointer to the bytes to insert ++ */ ++int reiserfs_insert_item(struct reiserfs_transaction_handle *th, ++ struct treepath *path, const struct cpu_key *key, ++ struct item_head *ih, struct inode *inode, ++ const char *body) ++{ + struct tree_balance s_ins_balance; + int retval; + int fs_gen = 0; +@@ -2006,28 +2015,27 @@ int reiserfs_insert_item(struct reiserfs + + if (inode) { /* Do we count quotas for item? */ + fs_gen = get_generation(inode->i_sb); +- quota_bytes = ih_item_len(p_s_ih); ++ quota_bytes = ih_item_len(ih); + + /* hack so the quota code doesn't have to guess if the file has + ** a tail, links are always tails, so there's no guessing needed + */ +- if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_s_ih)) { ++ if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih)) + quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE; +- } + #ifdef REISERQUOTA_DEBUG + reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, + "reiserquota insert_item(): allocating %u id=%u type=%c", +- quota_bytes, inode->i_uid, head2type(p_s_ih)); ++ quota_bytes, inode->i_uid, head2type(ih)); + #endif + /* We can't dirty inode here. It would be immediately written but + * appropriate stat item isn't inserted yet... */ + if (DQUOT_ALLOC_SPACE_NODIRTY(inode, quota_bytes)) { +- pathrelse(p_s_path); ++ pathrelse(path); + return -EDQUOT; + } + } +- init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, +- IH_SIZE + ih_item_len(p_s_ih)); ++ init_tb_struct(th, &s_ins_balance, th->t_super, path, ++ IH_SIZE + ih_item_len(ih)); + #ifdef DISPLACE_NEW_PACKING_LOCALITIES + s_ins_balance.key = key->on_disk_key; + #endif +@@ -2037,12 +2045,12 @@ int reiserfs_insert_item(struct reiserfs + } + + while ((retval = +- fix_nodes(M_INSERT, &s_ins_balance, p_s_ih, +- p_c_body)) == REPEAT_SEARCH) { ++ fix_nodes(M_INSERT, &s_ins_balance, ih, ++ body)) == REPEAT_SEARCH) { + search_again: + /* file system changed while we were in the fix_nodes */ + PROC_INFO_INC(th->t_super, insert_item_restarted); +- retval = search_item(th->t_super, key, p_s_path); ++ retval = search_item(th->t_super, key, path); + if (retval == IO_ERROR) { + retval = -EIO; + goto error_out; +@@ -2058,7 +2066,7 @@ int reiserfs_insert_item(struct reiserfs + + /* make balancing after all resources will be collected at a time */ + if (retval == CARRY_ON) { +- do_balance(&s_ins_balance, p_s_ih, p_c_body, M_INSERT); ++ do_balance(&s_ins_balance, ih, body, M_INSERT); + return 0; + } + +@@ -2069,7 +2077,7 @@ int reiserfs_insert_item(struct reiserfs + #ifdef REISERQUOTA_DEBUG + reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, + "reiserquota insert_item(): freeing %u id=%u type=%c", +- quota_bytes, inode->i_uid, head2type(p_s_ih)); ++ quota_bytes, inode->i_uid, head2type(ih)); + #endif + if (inode) + DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes); +--- a/fs/reiserfs/tail_conversion.c ++++ b/fs/reiserfs/tail_conversion.c +@@ -172,10 +172,12 @@ void reiserfs_unmap_buffer(struct buffer + inode */ + int indirect2direct(struct reiserfs_transaction_handle *th, + struct inode *inode, struct page *page, +- struct treepath *p_s_path, /* path to the indirect item. */ +- const struct cpu_key *p_s_item_key, /* Key to look for unformatted node pointer to be cut. */ ++ struct treepath *path, /* path to the indirect item. */ ++ const struct cpu_key *item_key, /* Key to look for ++ * unformatted node ++ * pointer to be cut. */ + loff_t n_new_file_size, /* New file size. */ +- char *p_c_mode) ++ char *mode) + { + struct super_block *sb = inode->i_sb; + struct item_head s_ih; +@@ -189,10 +191,10 @@ int indirect2direct(struct reiserfs_tran + + REISERFS_SB(sb)->s_indirect2direct++; + +- *p_c_mode = M_SKIP_BALANCING; ++ *mode = M_SKIP_BALANCING; + + /* store item head path points to. */ +- copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); ++ copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); + + tail_len = (n_new_file_size & (n_block_size - 1)); + if (get_inode_sd_version(inode) == STAT_DATA_V2) +@@ -211,14 +213,14 @@ int indirect2direct(struct reiserfs_tran + + tail = (char *)kmap(page); /* this can schedule */ + +- if (path_changed(&s_ih, p_s_path)) { ++ if (path_changed(&s_ih, path)) { + /* re-search indirect item */ +- if (search_for_position_by_key(sb, p_s_item_key, p_s_path) ++ if (search_for_position_by_key(sb, item_key, path) + == POSITION_NOT_FOUND) + reiserfs_panic(sb, "PAP-5520", + "item to be converted %K does not exist", +- p_s_item_key); +- copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); ++ item_key); ++ copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); + #ifdef CONFIG_REISERFS_CHECK + pos = le_ih_k_offset(&s_ih) - 1 + + (ih_item_len(&s_ih) / UNFM_P_SIZE - +@@ -240,13 +242,13 @@ int indirect2direct(struct reiserfs_tran + */ + tail = tail + (pos & (PAGE_CACHE_SIZE - 1)); + +- PATH_LAST_POSITION(p_s_path)++; ++ PATH_LAST_POSITION(path)++; + +- key = *p_s_item_key; ++ key = *item_key; + set_cpu_key_k_type(&key, TYPE_DIRECT); + key.key_length = 4; + /* Insert tail as new direct item in the tree */ +- if (reiserfs_insert_item(th, p_s_path, &key, &s_ih, inode, ++ if (reiserfs_insert_item(th, path, &key, &s_ih, inode, + tail ? tail : NULL) < 0) { + /* No disk memory. So we can not convert last unformatted node + to the direct item. In this case we used to adjust +@@ -268,7 +270,7 @@ int indirect2direct(struct reiserfs_tran + + /* We have inserted new direct item and must remove last + unformatted node. */ +- *p_c_mode = M_CUT; ++ *mode = M_CUT; + + /* we store position of first direct item in the in-core inode */ + /* mark_file_with_tail (inode, pos1 + 1); */ +--- a/include/linux/reiserfs_fs.h ++++ b/include/linux/reiserfs_fs.h +@@ -694,9 +694,9 @@ static inline void cpu_key_k_offset_dec( + #define is_indirect_cpu_ih(ih) (is_indirect_cpu_key (&((ih)->ih_key))) + #define is_statdata_cpu_ih(ih) (is_statdata_cpu_key (&((ih)->ih_key))) + +-#define I_K_KEY_IN_ITEM(p_s_ih, p_s_key, n_blocksize) \ +- ( ! COMP_SHORT_KEYS(p_s_ih, p_s_key) && \ +- I_OFF_BYTE_IN_ITEM(p_s_ih, k_offset (p_s_key), n_blocksize) ) ++#define I_K_KEY_IN_ITEM(ih, key, n_blocksize) \ ++ (!COMP_SHORT_KEYS(ih, key) && \ ++ I_OFF_BYTE_IN_ITEM(ih, k_offset(key), n_blocksize)) + + /* maximal length of item */ + #define MAX_ITEM_LEN(block_size) (block_size - BLKH_SIZE - IH_SIZE) +@@ -1196,33 +1196,33 @@ struct treepath { + struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,} + + /* Get path element by path and path position. */ +-#define PATH_OFFSET_PELEMENT(p_s_path,n_offset) ((p_s_path)->path_elements +(n_offset)) ++#define PATH_OFFSET_PELEMENT(path, n_offset) ((path)->path_elements + (n_offset)) + + /* Get buffer header at the path by path and path position. */ +-#define PATH_OFFSET_PBUFFER(p_s_path,n_offset) (PATH_OFFSET_PELEMENT(p_s_path,n_offset)->pe_buffer) ++#define PATH_OFFSET_PBUFFER(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_buffer) + + /* Get position in the element at the path by path and path position. */ +-#define PATH_OFFSET_POSITION(p_s_path,n_offset) (PATH_OFFSET_PELEMENT(p_s_path,n_offset)->pe_position) ++#define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position) + +-#define PATH_PLAST_BUFFER(p_s_path) (PATH_OFFSET_PBUFFER((p_s_path), (p_s_path)->path_length)) ++#define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length)) + /* you know, to the person who didn't + write this the macro name does not + at first suggest what it does. + Maybe POSITION_FROM_PATH_END? Or + maybe we should just focus on + dumping paths... -Hans */ +-#define PATH_LAST_POSITION(p_s_path) (PATH_OFFSET_POSITION((p_s_path), (p_s_path)->path_length)) ++#define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length)) + +-#define PATH_PITEM_HEAD(p_s_path) B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_path),PATH_LAST_POSITION(p_s_path)) ++#define PATH_PITEM_HEAD(path) B_N_PITEM_HEAD(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path)) + + /* in do_balance leaf has h == 0 in contrast with path structure, + where root has level == 0. That is why we need these defines */ +-#define PATH_H_PBUFFER(p_s_path, h) PATH_OFFSET_PBUFFER (p_s_path, p_s_path->path_length - (h)) /* tb->S[h] */ ++#define PATH_H_PBUFFER(path, h) PATH_OFFSET_PBUFFER (path, path->path_length - (h)) /* tb->S[h] */ + #define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1) /* tb->F[h] or tb->S[0]->b_parent */ + #define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h)) + #define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) /* tb->S[h]->b_item_order */ + +-#define PATH_H_PATH_OFFSET(p_s_path, n_h) ((p_s_path)->path_length - (n_h)) ++#define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h)) + + #define get_last_bh(path) PATH_PLAST_BUFFER(path) + #define get_ih(path) PATH_PITEM_HEAD(path) +@@ -1512,7 +1512,7 @@ extern struct item_operations *item_ops[ + #define COMP_SHORT_KEYS comp_short_keys + + /* number of blocks pointed to by the indirect item */ +-#define I_UNFM_NUM(p_s_ih) ( ih_item_len(p_s_ih) / UNFM_P_SIZE ) ++#define I_UNFM_NUM(ih) (ih_item_len(ih) / UNFM_P_SIZE) + + /* the used space within the unformatted node corresponding to pos within the item pointed to by ih */ + #define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size)) +@@ -1793,8 +1793,8 @@ int reiserfs_convert_objectid_map_v1(str + + /* stree.c */ + int B_IS_IN_TREE(const struct buffer_head *); +-extern void copy_item_head(struct item_head *p_v_to, +- const struct item_head *p_v_from); ++extern void copy_item_head(struct item_head *to, ++ const struct item_head *from); + + // first key is in cpu form, second - le + extern int comp_short_keys(const struct reiserfs_key *le_key, +@@ -1829,20 +1829,20 @@ static inline void copy_key(struct reise + memcpy(to, from, KEY_SIZE); + } + +-int comp_items(const struct item_head *stored_ih, const struct treepath *p_s_path); +-const struct reiserfs_key *get_rkey(const struct treepath *p_s_chk_path, ++int comp_items(const struct item_head *stored_ih, const struct treepath *path); ++const struct reiserfs_key *get_rkey(const struct treepath *chk_path, + const struct super_block *sb); + int search_by_key(struct super_block *, const struct cpu_key *, + struct treepath *, int); + #define search_item(s,key,path) search_by_key (s, key, path, DISK_LEAF_NODE_LEVEL) + int search_for_position_by_key(struct super_block *sb, +- const struct cpu_key *p_s_cpu_key, +- struct treepath *p_s_search_path); ++ const struct cpu_key *cpu_key, ++ struct treepath *search_path); + extern void decrement_bcount(struct buffer_head *bh); +-void decrement_counters_in_path(struct treepath *p_s_search_path); +-void pathrelse(struct treepath *p_s_search_path); ++void decrement_counters_in_path(struct treepath *search_path); ++void pathrelse(struct treepath *search_path); + int reiserfs_check_path(struct treepath *p); +-void pathrelse_and_restore(struct super_block *s, struct treepath *p_s_search_path); ++void pathrelse_and_restore(struct super_block *s, struct treepath *search_path); + + int reiserfs_insert_item(struct reiserfs_transaction_handle *th, + struct treepath *path, +@@ -1865,7 +1865,7 @@ int reiserfs_cut_from_item(struct reiser + int reiserfs_delete_item(struct reiserfs_transaction_handle *th, + struct treepath *path, + const struct cpu_key *key, +- struct inode *inode, struct buffer_head *p_s_un_bh); ++ struct inode *inode, struct buffer_head *un_bh); + + void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, + struct inode *inode, struct reiserfs_key *key); +@@ -2005,7 +2005,7 @@ extern const struct address_space_operat + /* fix_nodes.c */ + + int fix_nodes(int n_op_mode, struct tree_balance *tb, +- struct item_head *p_s_ins_ih, const void *); ++ struct item_head *ins_ih, const void *); + void unfix_nodes(struct tree_balance *); + + /* prints.c */ diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_s_bh.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_s_bh.diff new file mode 100644 index 0000000000..be108d6f5b --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_s_bh.diff @@ -0,0 +1,489 @@ +From: Jeff Mahoney +Subject: reiserfs: rename p_s_bh to bh + + This patch is a simple s/p_s_bh/bh/g to the reiserfs code. This is the second + in a series of patches to rip out some of the awful variable naming in + reiserfs. + +Signed-off-by: Jeff Mahoney + +--- + + fs/reiserfs/fix_node.c | 94 ++++++++++++++++++++------------------------ + fs/reiserfs/stree.c | 63 ++++++++++++++--------------- + include/linux/reiserfs_fs.h | 37 ++++++++--------- + 3 files changed, 94 insertions(+), 100 deletions(-) + +--- a/fs/reiserfs/fix_node.c ++++ b/fs/reiserfs/fix_node.c +@@ -1887,7 +1887,7 @@ static int check_balance(int mode, + /* Check whether parent at the path is the really parent of the current node.*/ + static int get_direct_parent(struct tree_balance *p_s_tb, int n_h) + { +- struct buffer_head *p_s_bh; ++ struct buffer_head *bh; + struct treepath *p_s_path = p_s_tb->tb_path; + int n_position, + n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h); +@@ -1909,21 +1909,21 @@ static int get_direct_parent(struct tree + } + + if (!B_IS_IN_TREE +- (p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))) ++ (bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))) + return REPEAT_SEARCH; /* Parent in the path is not in the tree. */ + + if ((n_position = + PATH_OFFSET_POSITION(p_s_path, +- n_path_offset - 1)) > B_NR_ITEMS(p_s_bh)) ++ n_path_offset - 1)) > B_NR_ITEMS(bh)) + return REPEAT_SEARCH; + +- if (B_N_CHILD_NUM(p_s_bh, n_position) != ++ if (B_N_CHILD_NUM(bh, n_position) != + PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr) + /* Parent in the path is not parent of the current node in the tree. */ + return REPEAT_SEARCH; + +- if (buffer_locked(p_s_bh)) { +- __wait_on_buffer(p_s_bh); ++ if (buffer_locked(bh)) { ++ __wait_on_buffer(bh); + if (FILESYSTEM_CHANGED_TB(p_s_tb)) + return REPEAT_SEARCH; + } +@@ -1943,29 +1943,29 @@ static int get_neighbors(struct tree_bal + n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1); + unsigned long n_son_number; + struct super_block *sb = p_s_tb->tb_sb; +- struct buffer_head *p_s_bh; ++ struct buffer_head *bh; + + PROC_INFO_INC(sb, get_neighbors[n_h]); + + if (p_s_tb->lnum[n_h]) { + /* We need left neighbor to balance S[n_h]. */ + PROC_INFO_INC(sb, need_l_neighbor[n_h]); +- p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); ++ bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); + +- RFALSE(p_s_bh == p_s_tb->FL[n_h] && ++ RFALSE(bh == p_s_tb->FL[n_h] && + !PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset), + "PAP-8270: invalid position in the parent"); + + n_child_position = +- (p_s_bh == ++ (bh == + p_s_tb->FL[n_h]) ? p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb-> + FL[n_h]); + n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position); +- p_s_bh = sb_bread(sb, n_son_number); +- if (!p_s_bh) ++ bh = sb_bread(sb, n_son_number); ++ if (!bh) + return IO_ERROR; + if (FILESYSTEM_CHANGED_TB(p_s_tb)) { +- brelse(p_s_bh); ++ brelse(bh); + PROC_INFO_INC(sb, get_neighbors_restart[n_h]); + return REPEAT_SEARCH; + } +@@ -1973,48 +1973,48 @@ static int get_neighbors(struct tree_bal + RFALSE(!B_IS_IN_TREE(p_s_tb->FL[n_h]) || + n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) || + B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) != +- p_s_bh->b_blocknr, "PAP-8275: invalid parent"); +- RFALSE(!B_IS_IN_TREE(p_s_bh), "PAP-8280: invalid child"); ++ bh->b_blocknr, "PAP-8275: invalid parent"); ++ RFALSE(!B_IS_IN_TREE(bh), "PAP-8280: invalid child"); + RFALSE(!n_h && +- B_FREE_SPACE(p_s_bh) != +- MAX_CHILD_SIZE(p_s_bh) - ++ B_FREE_SPACE(bh) != ++ MAX_CHILD_SIZE(bh) - + dc_size(B_N_CHILD(p_s_tb->FL[0], n_child_position)), + "PAP-8290: invalid child size of left neighbor"); + + brelse(p_s_tb->L[n_h]); +- p_s_tb->L[n_h] = p_s_bh; ++ p_s_tb->L[n_h] = bh; + } + + if (p_s_tb->rnum[n_h]) { /* We need right neighbor to balance S[n_path_offset]. */ + PROC_INFO_INC(sb, need_r_neighbor[n_h]); +- p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); ++ bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); + +- RFALSE(p_s_bh == p_s_tb->FR[n_h] && ++ RFALSE(bh == p_s_tb->FR[n_h] && + PATH_OFFSET_POSITION(p_s_tb->tb_path, + n_path_offset) >= +- B_NR_ITEMS(p_s_bh), ++ B_NR_ITEMS(bh), + "PAP-8295: invalid position in the parent"); + + n_child_position = +- (p_s_bh == p_s_tb->FR[n_h]) ? p_s_tb->rkey[n_h] + 1 : 0; ++ (bh == p_s_tb->FR[n_h]) ? p_s_tb->rkey[n_h] + 1 : 0; + n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position); +- p_s_bh = sb_bread(sb, n_son_number); +- if (!p_s_bh) ++ bh = sb_bread(sb, n_son_number); ++ if (!bh) + return IO_ERROR; + if (FILESYSTEM_CHANGED_TB(p_s_tb)) { +- brelse(p_s_bh); ++ brelse(bh); + PROC_INFO_INC(sb, get_neighbors_restart[n_h]); + return REPEAT_SEARCH; + } + brelse(p_s_tb->R[n_h]); +- p_s_tb->R[n_h] = p_s_bh; ++ p_s_tb->R[n_h] = bh; + + RFALSE(!n_h +- && B_FREE_SPACE(p_s_bh) != +- MAX_CHILD_SIZE(p_s_bh) - ++ && B_FREE_SPACE(bh) != ++ MAX_CHILD_SIZE(bh) - + dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)), + "PAP-8300: invalid child size of right neighbor (%d != %d - %d)", +- B_FREE_SPACE(p_s_bh), MAX_CHILD_SIZE(p_s_bh), ++ B_FREE_SPACE(bh), MAX_CHILD_SIZE(bh), + dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position))); + + } +@@ -2090,51 +2090,45 @@ static int get_mem_for_virtual_node(stru + + #ifdef CONFIG_REISERFS_CHECK + static void tb_buffer_sanity_check(struct super_block *sb, +- struct buffer_head *p_s_bh, ++ struct buffer_head *bh, + const char *descr, int level) + { +- if (p_s_bh) { +- if (atomic_read(&(p_s_bh->b_count)) <= 0) { ++ if (bh) { ++ if (atomic_read(&(bh->b_count)) <= 0) + + reiserfs_panic(sb, "jmacd-1", "negative or zero " + "reference counter for buffer %s[%d] " +- "(%b)", descr, level, p_s_bh); +- } ++ "(%b)", descr, level, bh); + +- if (!buffer_uptodate(p_s_bh)) { ++ if (!buffer_uptodate(bh)) + reiserfs_panic(sb, "jmacd-2", "buffer is not up " + "to date %s[%d] (%b)", +- descr, level, p_s_bh); +- } ++ descr, level, bh); + +- if (!B_IS_IN_TREE(p_s_bh)) { ++ if (!B_IS_IN_TREE(bh)) + reiserfs_panic(sb, "jmacd-3", "buffer is not " + "in tree %s[%d] (%b)", +- descr, level, p_s_bh); +- } ++ descr, level, bh); + +- if (p_s_bh->b_bdev != sb->s_bdev) { ++ if (bh->b_bdev != sb->s_bdev) + reiserfs_panic(sb, "jmacd-4", "buffer has wrong " + "device %s[%d] (%b)", +- descr, level, p_s_bh); +- } ++ descr, level, bh); + +- if (p_s_bh->b_size != sb->s_blocksize) { ++ if (bh->b_size != sb->s_blocksize) + reiserfs_panic(sb, "jmacd-5", "buffer has wrong " + "blocksize %s[%d] (%b)", +- descr, level, p_s_bh); +- } ++ descr, level, bh); + +- if (p_s_bh->b_blocknr > SB_BLOCK_COUNT(sb)) { ++ if (bh->b_blocknr > SB_BLOCK_COUNT(sb)) + reiserfs_panic(sb, "jmacd-6", "buffer block " + "number too high %s[%d] (%b)", +- descr, level, p_s_bh); +- } ++ descr, level, bh); + } + } + #else + static void tb_buffer_sanity_check(struct super_block *sb, +- struct buffer_head *p_s_bh, ++ struct buffer_head *bh, + const char *descr, int level) + {; + } +--- a/fs/reiserfs/stree.c ++++ b/fs/reiserfs/stree.c +@@ -56,13 +56,13 @@ + #include + + /* Does the buffer contain a disk block which is in the tree. */ +-inline int B_IS_IN_TREE(const struct buffer_head *p_s_bh) ++inline int B_IS_IN_TREE(const struct buffer_head *bh) + { + +- RFALSE(B_LEVEL(p_s_bh) > MAX_HEIGHT, +- "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh); ++ RFALSE(B_LEVEL(bh) > MAX_HEIGHT, ++ "PAP-1010: block (%b) has too big level (%z)", bh, bh); + +- return (B_LEVEL(p_s_bh) != FREE_LEVEL); ++ return (B_LEVEL(bh) != FREE_LEVEL); + } + + // +@@ -579,7 +579,7 @@ int search_by_key(struct super_block *sb + { + b_blocknr_t n_block_number; + int expected_level; +- struct buffer_head *p_s_bh; ++ struct buffer_head *bh; + struct path_element *p_s_last_element; + int n_node_level, n_retval; + int right_neighbor_of_leaf_node; +@@ -626,15 +626,14 @@ int search_by_key(struct super_block *sb + + /* Read the next tree node, and set the last element in the path to + have a pointer to it. */ +- if ((p_s_bh = p_s_last_element->pe_buffer = ++ if ((bh = p_s_last_element->pe_buffer = + sb_getblk(sb, n_block_number))) { +- if (!buffer_uptodate(p_s_bh) && reada_count > 1) { ++ if (!buffer_uptodate(bh) && reada_count > 1) + search_by_key_reada(sb, reada_bh, + reada_blocks, reada_count); +- } +- ll_rw_block(READ, 1, &p_s_bh); +- wait_on_buffer(p_s_bh); +- if (!buffer_uptodate(p_s_bh)) ++ ll_rw_block(READ, 1, &bh); ++ wait_on_buffer(bh); ++ if (!buffer_uptodate(bh)) + goto io_error; + } else { + io_error: +@@ -651,8 +650,8 @@ int search_by_key(struct super_block *sb + to search is still in the tree rooted from the current buffer. If + not then repeat search from the root. */ + if (fs_changed(fs_gen, sb) && +- (!B_IS_IN_TREE(p_s_bh) || +- B_LEVEL(p_s_bh) != expected_level || ++ (!B_IS_IN_TREE(bh) || ++ B_LEVEL(bh) != expected_level || + !key_in_buffer(p_s_search_path, p_s_key, sb))) { + PROC_INFO_INC(sb, search_by_key_fs_changed); + PROC_INFO_INC(sb, search_by_key_restarted); +@@ -686,25 +685,25 @@ int search_by_key(struct super_block *sb + + // make sure, that the node contents look like a node of + // certain level +- if (!is_tree_node(p_s_bh, expected_level)) { ++ if (!is_tree_node(bh, expected_level)) { + reiserfs_error(sb, "vs-5150", + "invalid format found in block %ld. " +- "Fsck?", p_s_bh->b_blocknr); ++ "Fsck?", bh->b_blocknr); + pathrelse(p_s_search_path); + return IO_ERROR; + } + + /* ok, we have acquired next formatted node in the tree */ +- n_node_level = B_LEVEL(p_s_bh); ++ n_node_level = B_LEVEL(bh); + +- PROC_INFO_BH_STAT(sb, p_s_bh, n_node_level - 1); ++ PROC_INFO_BH_STAT(sb, bh, n_node_level - 1); + + RFALSE(n_node_level < n_stop_level, + "vs-5152: tree level (%d) is less than stop level (%d)", + n_node_level, n_stop_level); + +- n_retval = bin_search(p_s_key, B_N_PITEM_HEAD(p_s_bh, 0), +- B_NR_ITEMS(p_s_bh), ++ n_retval = bin_search(p_s_key, B_N_PITEM_HEAD(bh, 0), ++ B_NR_ITEMS(bh), + (n_node_level == + DISK_LEAF_NODE_LEVEL) ? IH_SIZE : + KEY_SIZE, +@@ -726,13 +725,13 @@ int search_by_key(struct super_block *sb + an internal node. Now we calculate child block number by + position in the node. */ + n_block_number = +- B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position); ++ B_N_CHILD_NUM(bh, p_s_last_element->pe_position); + + /* if we are going to read leaf nodes, try for read ahead as well */ + if ((p_s_search_path->reada & PATH_READA) && + n_node_level == DISK_LEAF_NODE_LEVEL + 1) { + int pos = p_s_last_element->pe_position; +- int limit = B_NR_ITEMS(p_s_bh); ++ int limit = B_NR_ITEMS(bh); + struct reiserfs_key *le_key; + + if (p_s_search_path->reada & PATH_READA_BACK) +@@ -741,7 +740,7 @@ int search_by_key(struct super_block *sb + if (pos == limit) + break; + reada_blocks[reada_count++] = +- B_N_CHILD_NUM(p_s_bh, pos); ++ B_N_CHILD_NUM(bh, pos); + if (p_s_search_path->reada & PATH_READA_BACK) + pos--; + else +@@ -750,7 +749,7 @@ int search_by_key(struct super_block *sb + /* + * check to make sure we're in the same object + */ +- le_key = B_N_PDELIM_KEY(p_s_bh, pos); ++ le_key = B_N_PDELIM_KEY(bh, pos); + if (le32_to_cpu(le_key->k_objectid) != + p_s_key->on_disk_key.k_objectid) { + break; +@@ -851,15 +850,15 @@ int search_for_position_by_key(struct su + /* Compare given item and item pointed to by the path. */ + int comp_items(const struct item_head *stored_ih, const struct treepath *p_s_path) + { +- struct buffer_head *p_s_bh; ++ struct buffer_head *bh = PATH_PLAST_BUFFER(p_s_path); + struct item_head *ih; + + /* Last buffer at the path is not in the tree. */ +- if (!B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path))) ++ if (!B_IS_IN_TREE(bh)) + return 1; + + /* Last path position is invalid. */ +- if (PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh)) ++ if (PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(bh)) + return 1; + + /* we need only to know, whether it is the same item */ +@@ -959,7 +958,7 @@ static char prepare_for_delete_or_cut(st + { + struct super_block *sb = inode->i_sb; + struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_path); +- struct buffer_head *p_s_bh = PATH_PLAST_BUFFER(p_s_path); ++ struct buffer_head *bh = PATH_PLAST_BUFFER(p_s_path); + + BUG_ON(!th->t_trans_id); + +@@ -1003,7 +1002,7 @@ static char prepare_for_delete_or_cut(st + do { + need_re_search = 0; + *p_n_cut_size = 0; +- p_s_bh = PATH_PLAST_BUFFER(p_s_path); ++ bh = PATH_PLAST_BUFFER(p_s_path); + copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); + pos = I_UNFM_NUM(&s_ih); + +@@ -1019,13 +1018,13 @@ static char prepare_for_delete_or_cut(st + break; + } + +- unfm = (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + pos - 1; ++ unfm = (__le32 *)B_I_PITEM(bh, &s_ih) + pos - 1; + block = get_block_num(unfm, 0); + + if (block != 0) { +- reiserfs_prepare_for_journal(sb, p_s_bh, 1); ++ reiserfs_prepare_for_journal(sb, bh, 1); + put_block_num(unfm, 0, 0); +- journal_mark_dirty (th, sb, p_s_bh); ++ journal_mark_dirty(th, sb, bh); + reiserfs_free_block(th, inode, block, 1); + } + +@@ -1049,7 +1048,7 @@ static char prepare_for_delete_or_cut(st + /* a trick. If the buffer has been logged, this will do nothing. If + ** we've broken the loop without logging it, it will restore the + ** buffer */ +- reiserfs_restore_prepared_buffer(sb, p_s_bh); ++ reiserfs_restore_prepared_buffer(sb, bh); + } while (need_re_search && + search_for_position_by_key(sb, p_s_item_key, p_s_path) == POSITION_FOUND); + pos_in_item(p_s_path) = pos * UNFM_P_SIZE; +--- a/include/linux/reiserfs_fs.h ++++ b/include/linux/reiserfs_fs.h +@@ -751,25 +751,25 @@ struct block_head { + #define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */ + + /* Given the buffer head of a formatted node, resolve to the block head of that node. */ +-#define B_BLK_HEAD(p_s_bh) ((struct block_head *)((p_s_bh)->b_data)) ++#define B_BLK_HEAD(bh) ((struct block_head *)((bh)->b_data)) + /* Number of items that are in buffer. */ +-#define B_NR_ITEMS(p_s_bh) (blkh_nr_item(B_BLK_HEAD(p_s_bh))) +-#define B_LEVEL(p_s_bh) (blkh_level(B_BLK_HEAD(p_s_bh))) +-#define B_FREE_SPACE(p_s_bh) (blkh_free_space(B_BLK_HEAD(p_s_bh))) +- +-#define PUT_B_NR_ITEMS(p_s_bh,val) do { set_blkh_nr_item(B_BLK_HEAD(p_s_bh),val); } while (0) +-#define PUT_B_LEVEL(p_s_bh,val) do { set_blkh_level(B_BLK_HEAD(p_s_bh),val); } while (0) +-#define PUT_B_FREE_SPACE(p_s_bh,val) do { set_blkh_free_space(B_BLK_HEAD(p_s_bh),val); } while (0) ++#define B_NR_ITEMS(bh) (blkh_nr_item(B_BLK_HEAD(bh))) ++#define B_LEVEL(bh) (blkh_level(B_BLK_HEAD(bh))) ++#define B_FREE_SPACE(bh) (blkh_free_space(B_BLK_HEAD(bh))) ++ ++#define PUT_B_NR_ITEMS(bh, val) do { set_blkh_nr_item(B_BLK_HEAD(bh), val); } while (0) ++#define PUT_B_LEVEL(bh, val) do { set_blkh_level(B_BLK_HEAD(bh), val); } while (0) ++#define PUT_B_FREE_SPACE(bh, val) do { set_blkh_free_space(B_BLK_HEAD(bh), val); } while (0) + + /* Get right delimiting key. -- little endian */ +-#define B_PRIGHT_DELIM_KEY(p_s_bh) (&(blk_right_delim_key(B_BLK_HEAD(p_s_bh)))) ++#define B_PRIGHT_DELIM_KEY(bh) (&(blk_right_delim_key(B_BLK_HEAD(bh)))) + + /* Does the buffer contain a disk leaf. */ +-#define B_IS_ITEMS_LEVEL(p_s_bh) (B_LEVEL(p_s_bh) == DISK_LEAF_NODE_LEVEL) ++#define B_IS_ITEMS_LEVEL(bh) (B_LEVEL(bh) == DISK_LEAF_NODE_LEVEL) + + /* Does the buffer contain a disk internal node */ +-#define B_IS_KEYS_LEVEL(p_s_bh) (B_LEVEL(p_s_bh) > DISK_LEAF_NODE_LEVEL \ +- && B_LEVEL(p_s_bh) <= MAX_HEIGHT) ++#define B_IS_KEYS_LEVEL(bh) (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \ ++ && B_LEVEL(bh) <= MAX_HEIGHT) + + /***************************************************************************/ + /* STAT DATA */ +@@ -1119,12 +1119,13 @@ struct disk_child { + #define put_dc_size(dc_p, val) do { (dc_p)->dc_size = cpu_to_le16(val); } while(0) + + /* Get disk child by buffer header and position in the tree node. */ +-#define B_N_CHILD(p_s_bh,n_pos) ((struct disk_child *)\ +-((p_s_bh)->b_data+BLKH_SIZE+B_NR_ITEMS(p_s_bh)*KEY_SIZE+DC_SIZE*(n_pos))) ++#define B_N_CHILD(bh, n_pos) ((struct disk_child *)\ ++((bh)->b_data + BLKH_SIZE + B_NR_ITEMS(bh) * KEY_SIZE + DC_SIZE * (n_pos))) + + /* Get disk child number by buffer header and position in the tree node. */ +-#define B_N_CHILD_NUM(p_s_bh,n_pos) (dc_block_number(B_N_CHILD(p_s_bh,n_pos))) +-#define PUT_B_N_CHILD_NUM(p_s_bh,n_pos, val) (put_dc_block_number(B_N_CHILD(p_s_bh,n_pos), val )) ++#define B_N_CHILD_NUM(bh, n_pos) (dc_block_number(B_N_CHILD(bh, n_pos))) ++#define PUT_B_N_CHILD_NUM(bh, n_pos, val) \ ++ (put_dc_block_number(B_N_CHILD(bh, n_pos), val)) + + /* maximal value of field child_size in structure disk_child */ + /* child size is the combined size of all items and their headers */ +@@ -1837,7 +1838,7 @@ int search_by_key(struct super_block *, + int search_for_position_by_key(struct super_block *sb, + const struct cpu_key *p_s_cpu_key, + struct treepath *p_s_search_path); +-extern void decrement_bcount(struct buffer_head *p_s_bh); ++extern void decrement_bcount(struct buffer_head *bh); + void decrement_counters_in_path(struct treepath *p_s_search_path); + void pathrelse(struct treepath *p_s_search_path); + int reiserfs_check_path(struct treepath *p); +@@ -1978,7 +1979,7 @@ int reiserfs_global_version_in_proc(char + #define PROC_INFO_MAX( sb, field, value ) VOID_V + #define PROC_INFO_INC( sb, field ) VOID_V + #define PROC_INFO_ADD( sb, field, val ) VOID_V +-#define PROC_INFO_BH_STAT(sb, p_s_bh, n_node_level) VOID_V ++#define PROC_INFO_BH_STAT(sb, bh, n_node_level) VOID_V + #endif + + /* dir.c */ diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_s_inode.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_s_inode.diff new file mode 100644 index 0000000000..437637d9e0 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_s_inode.diff @@ -0,0 +1,554 @@ +From: Jeff Mahoney +Subject: reiserfs: rename p_s_inode to inode + + This patch is a simple s/p_s_inode/inode/g to the reiserfs code. This is the + third in a series of patches to rip out some of the awful variable naming in + reiserfs. + +Signed-off-by: Jeff Mahoney + +--- + + fs/reiserfs/file.c | 16 +++--- + fs/reiserfs/inode.c | 43 ++++++++--------- + fs/reiserfs/stree.c | 103 +++++++++++++++++++++--------------------- + fs/reiserfs/tail_conversion.c | 18 ++++--- + include/linux/reiserfs_fs.h | 4 - + 5 files changed, 95 insertions(+), 89 deletions(-) + +--- a/fs/reiserfs/file.c ++++ b/fs/reiserfs/file.c +@@ -137,17 +137,17 @@ static void reiserfs_vfs_truncate_file(s + static int reiserfs_sync_file(struct file *p_s_filp, + struct dentry *p_s_dentry, int datasync) + { +- struct inode *p_s_inode = p_s_dentry->d_inode; ++ struct inode *inode = p_s_dentry->d_inode; + int n_err; + int barrier_done; + +- BUG_ON(!S_ISREG(p_s_inode->i_mode)); +- n_err = sync_mapping_buffers(p_s_inode->i_mapping); +- reiserfs_write_lock(p_s_inode->i_sb); +- barrier_done = reiserfs_commit_for_inode(p_s_inode); +- reiserfs_write_unlock(p_s_inode->i_sb); +- if (barrier_done != 1 && reiserfs_barrier_flush(p_s_inode->i_sb)) +- blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL); ++ BUG_ON(!S_ISREG(inode->i_mode)); ++ n_err = sync_mapping_buffers(inode->i_mapping); ++ reiserfs_write_lock(inode->i_sb); ++ barrier_done = reiserfs_commit_for_inode(inode); ++ reiserfs_write_unlock(inode->i_sb); ++ if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) ++ blkdev_issue_flush(inode->i_sb->s_bdev, NULL); + if (barrier_done < 0) + return barrier_done; + return (n_err < 0) ? -EIO : 0; +--- a/fs/reiserfs/inode.c ++++ b/fs/reiserfs/inode.c +@@ -1992,7 +1992,7 @@ int reiserfs_new_inode(struct reiserfs_t + ** + ** on failure, nonzero is returned, page_result and bh_result are untouched. + */ +-static int grab_tail_page(struct inode *p_s_inode, ++static int grab_tail_page(struct inode *inode, + struct page **page_result, + struct buffer_head **bh_result) + { +@@ -2000,11 +2000,11 @@ static int grab_tail_page(struct inode * + /* we want the page with the last byte in the file, + ** not the page that will hold the next byte for appending + */ +- unsigned long index = (p_s_inode->i_size - 1) >> PAGE_CACHE_SHIFT; ++ unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; + unsigned long pos = 0; + unsigned long start = 0; +- unsigned long blocksize = p_s_inode->i_sb->s_blocksize; +- unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1); ++ unsigned long blocksize = inode->i_sb->s_blocksize; ++ unsigned long offset = (inode->i_size) & (PAGE_CACHE_SIZE - 1); + struct buffer_head *bh; + struct buffer_head *head; + struct page *page; +@@ -2018,7 +2018,7 @@ static int grab_tail_page(struct inode * + if ((offset & (blocksize - 1)) == 0) { + return -ENOENT; + } +- page = grab_cache_page(p_s_inode->i_mapping, index); ++ page = grab_cache_page(inode->i_mapping, index); + error = -ENOMEM; + if (!page) { + goto out; +@@ -2047,7 +2047,7 @@ static int grab_tail_page(struct inode * + ** I've screwed up the code to find the buffer, or the code to + ** call prepare_write + */ +- reiserfs_error(p_s_inode->i_sb, "clm-6000", ++ reiserfs_error(inode->i_sb, "clm-6000", + "error reading block %lu", bh->b_blocknr); + error = -EIO; + goto unlock; +@@ -2070,27 +2070,28 @@ static int grab_tail_page(struct inode * + ** + ** some code taken from block_truncate_page + */ +-int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) ++int reiserfs_truncate_file(struct inode *inode, int update_timestamps) + { + struct reiserfs_transaction_handle th; + /* we want the offset for the first byte after the end of the file */ +- unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1); +- unsigned blocksize = p_s_inode->i_sb->s_blocksize; ++ unsigned long offset = inode->i_size & (PAGE_CACHE_SIZE - 1); ++ unsigned blocksize = inode->i_sb->s_blocksize; + unsigned length; + struct page *page = NULL; + int error; + struct buffer_head *bh = NULL; + int err2; + +- reiserfs_write_lock(p_s_inode->i_sb); ++ reiserfs_write_lock(inode->i_sb); + +- if (p_s_inode->i_size > 0) { +- if ((error = grab_tail_page(p_s_inode, &page, &bh))) { ++ if (inode->i_size > 0) { ++ error = grab_tail_page(inode, &page, &bh); ++ if (error) { + // -ENOENT means we truncated past the end of the file, + // and get_block_create_0 could not find a block to read in, + // which is ok. + if (error != -ENOENT) +- reiserfs_error(p_s_inode->i_sb, "clm-6001", ++ reiserfs_error(inode->i_sb, "clm-6001", + "grab_tail_page failed %d", + error); + page = NULL; +@@ -2108,19 +2109,19 @@ int reiserfs_truncate_file(struct inode + /* it is enough to reserve space in transaction for 2 balancings: + one for "save" link adding and another for the first + cut_from_item. 1 is for update_sd */ +- error = journal_begin(&th, p_s_inode->i_sb, ++ error = journal_begin(&th, inode->i_sb, + JOURNAL_PER_BALANCE_CNT * 2 + 1); + if (error) + goto out; +- reiserfs_update_inode_transaction(p_s_inode); ++ reiserfs_update_inode_transaction(inode); + if (update_timestamps) + /* we are doing real truncate: if the system crashes before the last + transaction of truncating gets committed - on reboot the file + either appears truncated properly or not truncated at all */ +- add_save_link(&th, p_s_inode, 1); +- err2 = reiserfs_do_truncate(&th, p_s_inode, page, update_timestamps); ++ add_save_link(&th, inode, 1); ++ err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps); + error = +- journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1); ++ journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1); + if (error) + goto out; + +@@ -2131,7 +2132,7 @@ int reiserfs_truncate_file(struct inode + } + + if (update_timestamps) { +- error = remove_save_link(p_s_inode, 1 /* truncate */ ); ++ error = remove_save_link(inode, 1 /* truncate */); + if (error) + goto out; + } +@@ -2150,14 +2151,14 @@ int reiserfs_truncate_file(struct inode + page_cache_release(page); + } + +- reiserfs_write_unlock(p_s_inode->i_sb); ++ reiserfs_write_unlock(inode->i_sb); + return 0; + out: + if (page) { + unlock_page(page); + page_cache_release(page); + } +- reiserfs_write_unlock(p_s_inode->i_sb); ++ reiserfs_write_unlock(inode->i_sb); + return error; + } + +--- a/fs/reiserfs/stree.c ++++ b/fs/reiserfs/stree.c +@@ -1143,10 +1143,11 @@ char head2type(struct item_head *ih) + /* Delete object item. */ + int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct treepath *p_s_path, /* Path to the deleted item. */ + const struct cpu_key *p_s_item_key, /* Key to search for the deleted item. */ +- struct inode *p_s_inode, /* inode is here just to update i_blocks and quotas */ ++ struct inode *inode, /* inode is here just to update ++ * i_blocks and quotas */ + struct buffer_head *p_s_un_bh) + { /* NULL or unformatted node pointer. */ +- struct super_block *sb = p_s_inode->i_sb; ++ struct super_block *sb = inode->i_sb; + struct tree_balance s_del_balance; + struct item_head s_ih; + struct item_head *q_ih; +@@ -1170,10 +1171,10 @@ int reiserfs_delete_item(struct reiserfs + n_iter++; + c_mode = + #endif +- prepare_for_delete_or_cut(th, p_s_inode, p_s_path, ++ prepare_for_delete_or_cut(th, inode, p_s_path, + p_s_item_key, &n_removed, + &n_del_size, +- max_reiserfs_offset(p_s_inode)); ++ max_reiserfs_offset(inode)); + + RFALSE(c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE"); + +@@ -1214,7 +1215,7 @@ int reiserfs_delete_item(struct reiserfs + ** split into multiple items, and we only want to decrement for + ** the unfm node once + */ +- if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(q_ih)) { ++ if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) { + if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) { + quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; + } else { +@@ -1259,9 +1260,9 @@ int reiserfs_delete_item(struct reiserfs + #ifdef REISERQUOTA_DEBUG + reiserfs_debug(sb, REISERFS_DEBUG_CODE, + "reiserquota delete_item(): freeing %u, id=%u type=%c", +- quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih)); ++ quota_cut_bytes, inode->i_uid, head2type(&s_ih)); + #endif +- DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); ++ DQUOT_FREE_SPACE_NODIRTY(inode, quota_cut_bytes); + + /* Return deleted body length */ + return n_ret_value; +@@ -1423,25 +1424,25 @@ static void unmap_buffers(struct page *p + } + + static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th, +- struct inode *p_s_inode, ++ struct inode *inode, + struct page *page, + struct treepath *p_s_path, + const struct cpu_key *p_s_item_key, + loff_t n_new_file_size, char *p_c_mode) + { +- struct super_block *sb = p_s_inode->i_sb; ++ struct super_block *sb = inode->i_sb; + int n_block_size = sb->s_blocksize; + int cut_bytes; + BUG_ON(!th->t_trans_id); +- BUG_ON(n_new_file_size != p_s_inode->i_size); ++ BUG_ON(n_new_file_size != inode->i_size); + + /* the page being sent in could be NULL if there was an i/o error + ** reading in the last block. The user will hit problems trying to + ** read the file, but for now we just skip the indirect2direct + */ +- if (atomic_read(&p_s_inode->i_count) > 1 || +- !tail_has_to_be_packed(p_s_inode) || +- !page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) { ++ if (atomic_read(&inode->i_count) > 1 || ++ !tail_has_to_be_packed(inode) || ++ !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) { + /* leave tail in an unformatted node */ + *p_c_mode = M_SKIP_BALANCING; + cut_bytes = +@@ -1450,8 +1451,9 @@ static int maybe_indirect_to_direct(stru + return cut_bytes; + } + /* Permorm the conversion to a direct_item. */ +- /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode); */ +- return indirect2direct(th, p_s_inode, page, p_s_path, p_s_item_key, ++ /* return indirect_to_direct(inode, p_s_path, p_s_item_key, ++ n_new_file_size, p_c_mode); */ ++ return indirect2direct(th, inode, page, p_s_path, p_s_item_key, + n_new_file_size, p_c_mode); + } + +@@ -1505,10 +1507,10 @@ static void indirect_to_direct_roll_back + int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, + struct treepath *p_s_path, + struct cpu_key *p_s_item_key, +- struct inode *p_s_inode, ++ struct inode *inode, + struct page *page, loff_t n_new_file_size) + { +- struct super_block *sb = p_s_inode->i_sb; ++ struct super_block *sb = inode->i_sb; + /* Every function which is going to call do_balance must first + create a tree_balance structure. Then it must fill up this + structure by using the init_tb_struct and fix_nodes functions. +@@ -1525,7 +1527,7 @@ int reiserfs_cut_from_item(struct reiser + + BUG_ON(!th->t_trans_id); + +- init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path, ++ init_tb_struct(th, &s_cut_balance, inode->i_sb, p_s_path, + n_cut_size); + + /* Repeat this loop until we either cut the item without needing +@@ -1537,7 +1539,7 @@ int reiserfs_cut_from_item(struct reiser + pointers. */ + + c_mode = +- prepare_for_delete_or_cut(th, p_s_inode, p_s_path, ++ prepare_for_delete_or_cut(th, inode, p_s_path, + p_s_item_key, &n_removed, + &n_cut_size, n_new_file_size); + if (c_mode == M_CONVERT) { +@@ -1547,7 +1549,7 @@ int reiserfs_cut_from_item(struct reiser + "PAP-5570: can not convert twice"); + + n_ret_value = +- maybe_indirect_to_direct(th, p_s_inode, page, ++ maybe_indirect_to_direct(th, inode, page, + p_s_path, p_s_item_key, + n_new_file_size, &c_mode); + if (c_mode == M_SKIP_BALANCING) +@@ -1612,7 +1614,7 @@ int reiserfs_cut_from_item(struct reiser + if (n_is_inode_locked) { + // FIXME: this seems to be not needed: we are always able + // to cut item +- indirect_to_direct_roll_back(th, p_s_inode, p_s_path); ++ indirect_to_direct_roll_back(th, inode, p_s_path); + } + if (n_ret_value == NO_DISK_SPACE) + reiserfs_warning(sb, "reiserfs-5092", +@@ -1639,12 +1641,12 @@ int reiserfs_cut_from_item(struct reiser + ** item. + */ + p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path); +- if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) { ++ if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) { + if (c_mode == M_DELETE && + (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) == + 1) { + // FIXME: this is to keep 3.5 happy +- REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX; ++ REISERFS_I(inode)->i_first_direct_byte = U32_MAX; + quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; + } else { + quota_cut_bytes = 0; +@@ -1687,14 +1689,14 @@ int reiserfs_cut_from_item(struct reiser + ** unmap and invalidate it + */ + unmap_buffers(page, tail_pos); +- REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask; ++ REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; + } + #ifdef REISERQUOTA_DEBUG +- reiserfs_debug(p_s_inode->i_sb, REISERFS_DEBUG_CODE, ++ reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, + "reiserquota cut_from_item(): freeing %u id=%u type=%c", +- quota_cut_bytes, p_s_inode->i_uid, '?'); ++ quota_cut_bytes, inode->i_uid, '?'); + #endif +- DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); ++ DQUOT_FREE_SPACE_NODIRTY(inode, quota_cut_bytes); + return n_ret_value; + } + +@@ -1715,8 +1717,8 @@ static void truncate_directory(struct re + + /* Truncate file to the new size. Note, this must be called with a transaction + already started */ +-int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p_s_inode, /* ->i_size contains new +- size */ ++int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, ++ struct inode *inode, /* ->i_size contains new size */ + struct page *page, /* up to date for last block */ + int update_timestamps /* when it is called by + file_release to convert +@@ -1735,35 +1737,35 @@ int reiserfs_do_truncate(struct reiserfs + + BUG_ON(!th->t_trans_id); + if (! +- (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode) +- || S_ISLNK(p_s_inode->i_mode))) ++ (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ++ || S_ISLNK(inode->i_mode))) + return 0; + +- if (S_ISDIR(p_s_inode->i_mode)) { ++ if (S_ISDIR(inode->i_mode)) { + // deletion of directory - no need to update timestamps +- truncate_directory(th, p_s_inode); ++ truncate_directory(th, inode); + return 0; + } + + /* Get new file size. */ +- n_new_file_size = p_s_inode->i_size; ++ n_new_file_size = inode->i_size; + + // FIXME: note, that key type is unimportant here +- make_cpu_key(&s_item_key, p_s_inode, max_reiserfs_offset(p_s_inode), ++ make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode), + TYPE_DIRECT, 3); + + retval = +- search_for_position_by_key(p_s_inode->i_sb, &s_item_key, ++ search_for_position_by_key(inode->i_sb, &s_item_key, + &s_search_path); + if (retval == IO_ERROR) { +- reiserfs_error(p_s_inode->i_sb, "vs-5657", ++ reiserfs_error(inode->i_sb, "vs-5657", + "i/o failure occurred trying to truncate %K", + &s_item_key); + err = -EIO; + goto out; + } + if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) { +- reiserfs_error(p_s_inode->i_sb, "PAP-5660", ++ reiserfs_error(inode->i_sb, "PAP-5660", + "wrong result %d of search for %K", retval, + &s_item_key); + +@@ -1780,7 +1782,7 @@ int reiserfs_do_truncate(struct reiserfs + else { + loff_t offset = le_ih_k_offset(p_le_ih); + int bytes = +- op_bytes_number(p_le_ih, p_s_inode->i_sb->s_blocksize); ++ op_bytes_number(p_le_ih, inode->i_sb->s_blocksize); + + /* this may mismatch with real file size: if last direct item + had no padding zeros and last unformatted node had no free +@@ -1805,9 +1807,9 @@ int reiserfs_do_truncate(struct reiserfs + /* Cut or delete file item. */ + n_deleted = + reiserfs_cut_from_item(th, &s_search_path, &s_item_key, +- p_s_inode, page, n_new_file_size); ++ inode, page, n_new_file_size); + if (n_deleted < 0) { +- reiserfs_warning(p_s_inode->i_sb, "vs-5665", ++ reiserfs_warning(inode->i_sb, "vs-5665", + "reiserfs_cut_from_item failed"); + reiserfs_check_path(&s_search_path); + return 0; +@@ -1837,22 +1839,22 @@ int reiserfs_do_truncate(struct reiserfs + pathrelse(&s_search_path); + + if (update_timestamps) { +- p_s_inode->i_mtime = p_s_inode->i_ctime = +- CURRENT_TIME_SEC; ++ inode->i_mtime = CURRENT_TIME_SEC; ++ inode->i_ctime = CURRENT_TIME_SEC; + } +- reiserfs_update_sd(th, p_s_inode); ++ reiserfs_update_sd(th, inode); + +- err = journal_end(th, p_s_inode->i_sb, orig_len_alloc); ++ err = journal_end(th, inode->i_sb, orig_len_alloc); + if (err) + goto out; +- err = journal_begin(th, p_s_inode->i_sb, ++ err = journal_begin(th, inode->i_sb, + JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ; + if (err) + goto out; +- reiserfs_update_inode_transaction(p_s_inode); ++ reiserfs_update_inode_transaction(inode); + } + } while (n_file_size > ROUND_UP(n_new_file_size) && +- search_for_position_by_key(p_s_inode->i_sb, &s_item_key, ++ search_for_position_by_key(inode->i_sb, &s_item_key, + &s_search_path) == POSITION_FOUND); + + RFALSE(n_file_size > ROUND_UP(n_new_file_size), +@@ -1862,9 +1864,10 @@ int reiserfs_do_truncate(struct reiserfs + update_and_out: + if (update_timestamps) { + // this is truncate, not file closing +- p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC; ++ inode->i_mtime = CURRENT_TIME_SEC; ++ inode->i_ctime = CURRENT_TIME_SEC; + } +- reiserfs_update_sd(th, p_s_inode); ++ reiserfs_update_sd(th, inode); + + out: + pathrelse(&s_search_path); +--- a/fs/reiserfs/tail_conversion.c ++++ b/fs/reiserfs/tail_conversion.c +@@ -170,12 +170,14 @@ void reiserfs_unmap_buffer(struct buffer + what we expect from it (number of cut bytes). But when tail remains + in the unformatted node, we set mode to SKIP_BALANCING and unlock + inode */ +-int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_inode, struct page *page, struct treepath *p_s_path, /* path to the indirect item. */ ++int indirect2direct(struct reiserfs_transaction_handle *th, ++ struct inode *inode, struct page *page, ++ struct treepath *p_s_path, /* path to the indirect item. */ + const struct cpu_key *p_s_item_key, /* Key to look for unformatted node pointer to be cut. */ + loff_t n_new_file_size, /* New file size. */ + char *p_c_mode) + { +- struct super_block *sb = p_s_inode->i_sb; ++ struct super_block *sb = inode->i_sb; + struct item_head s_ih; + unsigned long n_block_size = sb->s_blocksize; + char *tail; +@@ -193,7 +195,7 @@ int indirect2direct(struct reiserfs_tran + copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); + + tail_len = (n_new_file_size & (n_block_size - 1)); +- if (get_inode_sd_version(p_s_inode) == STAT_DATA_V2) ++ if (get_inode_sd_version(inode) == STAT_DATA_V2) + round_tail_len = ROUND_UP(tail_len); + else + round_tail_len = tail_len; +@@ -228,7 +230,7 @@ int indirect2direct(struct reiserfs_tran + } + + /* Set direct item header to insert. */ +- make_le_item_head(&s_ih, NULL, get_inode_item_key_version(p_s_inode), ++ make_le_item_head(&s_ih, NULL, get_inode_item_key_version(inode), + pos1 + 1, TYPE_DIRECT, round_tail_len, + 0xffff /*ih_free_space */ ); + +@@ -244,7 +246,7 @@ int indirect2direct(struct reiserfs_tran + set_cpu_key_k_type(&key, TYPE_DIRECT); + key.key_length = 4; + /* Insert tail as new direct item in the tree */ +- if (reiserfs_insert_item(th, p_s_path, &key, &s_ih, p_s_inode, ++ if (reiserfs_insert_item(th, p_s_path, &key, &s_ih, inode, + tail ? tail : NULL) < 0) { + /* No disk memory. So we can not convert last unformatted node + to the direct item. In this case we used to adjust +@@ -258,7 +260,7 @@ int indirect2direct(struct reiserfs_tran + kunmap(page); + + /* make sure to get the i_blocks changes from reiserfs_insert_item */ +- reiserfs_update_sd(th, p_s_inode); ++ reiserfs_update_sd(th, inode); + + // note: we have now the same as in above direct2indirect + // conversion: there are two keys which have matching first three +@@ -269,8 +271,8 @@ int indirect2direct(struct reiserfs_tran + *p_c_mode = M_CUT; + + /* we store position of first direct item in the in-core inode */ +- //mark_file_with_tail (p_s_inode, pos1 + 1); +- REISERFS_I(p_s_inode)->i_first_direct_byte = pos1 + 1; ++ /* mark_file_with_tail (inode, pos1 + 1); */ ++ REISERFS_I(inode)->i_first_direct_byte = pos1 + 1; + + return n_block_size - round_tail_len; + } +--- a/include/linux/reiserfs_fs.h ++++ b/include/linux/reiserfs_fs.h +@@ -1870,9 +1870,9 @@ int reiserfs_delete_item(struct reiserfs + void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, + struct inode *inode, struct reiserfs_key *key); + int reiserfs_delete_object(struct reiserfs_transaction_handle *th, +- struct inode *p_s_inode); ++ struct inode *inode); + int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, +- struct inode *p_s_inode, struct page *, ++ struct inode *inode, struct page *, + int update_timestamps); + + #define i_block_size(inode) ((inode)->i_sb->s_blocksize) diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_s_sb.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_s_sb.diff new file mode 100644 index 0000000000..48ac35ff2a --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_s_sb.diff @@ -0,0 +1,2869 @@ +From: Jeff Mahoney +Subject: reiserfs: rename p_s_sb to sb + + This patch is a simple s/p_s_sb/sb/g to the reiserfs code. This is the first + in a series of patches to rip out some of the awful variable naming in + reiserfs. + +Signed-off-by: Jeff Mahoney + +--- + + fs/reiserfs/fix_node.c | 46 +- + fs/reiserfs/fix_node.c | 46 +- + fs/reiserfs/journal.c | 735 ++++++++++++++++++++---------------------- + fs/reiserfs/stree.c | 126 +++---- + fs/reiserfs/tail_conversion.c | 16 + include/linux/reiserfs_fs.h | 14 + 5 files changed, 468 insertions(+), 469 deletions(-) + +--- a/fs/reiserfs/fix_node.c ++++ b/fs/reiserfs/fix_node.c +@@ -785,7 +785,7 @@ static int get_empty_nodes(struct tree_b + b_blocknr_t *p_n_blocknr, a_n_blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; + int n_counter, n_number_of_freeblk, n_amount_needed, /* number of needed empty blocks */ + n_retval = CARRY_ON; +- struct super_block *p_s_sb = p_s_tb->tb_sb; ++ struct super_block *sb = p_s_tb->tb_sb; + + /* number_of_freeblk is the number of empty blocks which have been + acquired for use by the balancing algorithm minus the number of +@@ -830,7 +830,7 @@ static int get_empty_nodes(struct tree_b + RFALSE(!*p_n_blocknr, + "PAP-8135: reiserfs_new_blocknrs failed when got new blocks"); + +- p_s_new_bh = sb_getblk(p_s_sb, *p_n_blocknr); ++ p_s_new_bh = sb_getblk(sb, *p_n_blocknr); + RFALSE(buffer_dirty(p_s_new_bh) || + buffer_journaled(p_s_new_bh) || + buffer_journal_dirty(p_s_new_bh), +@@ -899,7 +899,7 @@ static int get_rfree(struct tree_balance + static int is_left_neighbor_in_cache(struct tree_balance *p_s_tb, int n_h) + { + struct buffer_head *p_s_father, *left; +- struct super_block *p_s_sb = p_s_tb->tb_sb; ++ struct super_block *sb = p_s_tb->tb_sb; + b_blocknr_t n_left_neighbor_blocknr; + int n_left_neighbor_position; + +@@ -924,7 +924,7 @@ static int is_left_neighbor_in_cache(str + n_left_neighbor_blocknr = + B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position); + /* Look for the left neighbor in the cache. */ +- if ((left = sb_find_get_block(p_s_sb, n_left_neighbor_blocknr))) { ++ if ((left = sb_find_get_block(sb, n_left_neighbor_blocknr))) { + + RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left), + "vs-8170: left neighbor (%b %z) is not in the tree", +@@ -1942,14 +1942,14 @@ static int get_neighbors(struct tree_bal + int n_child_position, + n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1); + unsigned long n_son_number; +- struct super_block *p_s_sb = p_s_tb->tb_sb; ++ struct super_block *sb = p_s_tb->tb_sb; + struct buffer_head *p_s_bh; + +- PROC_INFO_INC(p_s_sb, get_neighbors[n_h]); ++ PROC_INFO_INC(sb, get_neighbors[n_h]); + + if (p_s_tb->lnum[n_h]) { + /* We need left neighbor to balance S[n_h]. */ +- PROC_INFO_INC(p_s_sb, need_l_neighbor[n_h]); ++ PROC_INFO_INC(sb, need_l_neighbor[n_h]); + p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); + + RFALSE(p_s_bh == p_s_tb->FL[n_h] && +@@ -1961,12 +1961,12 @@ static int get_neighbors(struct tree_bal + p_s_tb->FL[n_h]) ? p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb-> + FL[n_h]); + n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position); +- p_s_bh = sb_bread(p_s_sb, n_son_number); ++ p_s_bh = sb_bread(sb, n_son_number); + if (!p_s_bh) + return IO_ERROR; + if (FILESYSTEM_CHANGED_TB(p_s_tb)) { + brelse(p_s_bh); +- PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]); ++ PROC_INFO_INC(sb, get_neighbors_restart[n_h]); + return REPEAT_SEARCH; + } + +@@ -1986,7 +1986,7 @@ static int get_neighbors(struct tree_bal + } + + if (p_s_tb->rnum[n_h]) { /* We need right neighbor to balance S[n_path_offset]. */ +- PROC_INFO_INC(p_s_sb, need_r_neighbor[n_h]); ++ PROC_INFO_INC(sb, need_r_neighbor[n_h]); + p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); + + RFALSE(p_s_bh == p_s_tb->FR[n_h] && +@@ -1998,12 +1998,12 @@ static int get_neighbors(struct tree_bal + n_child_position = + (p_s_bh == p_s_tb->FR[n_h]) ? p_s_tb->rkey[n_h] + 1 : 0; + n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position); +- p_s_bh = sb_bread(p_s_sb, n_son_number); ++ p_s_bh = sb_bread(sb, n_son_number); + if (!p_s_bh) + return IO_ERROR; + if (FILESYSTEM_CHANGED_TB(p_s_tb)) { + brelse(p_s_bh); +- PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]); ++ PROC_INFO_INC(sb, get_neighbors_restart[n_h]); + return REPEAT_SEARCH; + } + brelse(p_s_tb->R[n_h]); +@@ -2089,51 +2089,51 @@ static int get_mem_for_virtual_node(stru + } + + #ifdef CONFIG_REISERFS_CHECK +-static void tb_buffer_sanity_check(struct super_block *p_s_sb, ++static void tb_buffer_sanity_check(struct super_block *sb, + struct buffer_head *p_s_bh, + const char *descr, int level) + { + if (p_s_bh) { + if (atomic_read(&(p_s_bh->b_count)) <= 0) { + +- reiserfs_panic(p_s_sb, "jmacd-1", "negative or zero " ++ reiserfs_panic(sb, "jmacd-1", "negative or zero " + "reference counter for buffer %s[%d] " + "(%b)", descr, level, p_s_bh); + } + + if (!buffer_uptodate(p_s_bh)) { +- reiserfs_panic(p_s_sb, "jmacd-2", "buffer is not up " ++ reiserfs_panic(sb, "jmacd-2", "buffer is not up " + "to date %s[%d] (%b)", + descr, level, p_s_bh); + } + + if (!B_IS_IN_TREE(p_s_bh)) { +- reiserfs_panic(p_s_sb, "jmacd-3", "buffer is not " ++ reiserfs_panic(sb, "jmacd-3", "buffer is not " + "in tree %s[%d] (%b)", + descr, level, p_s_bh); + } + +- if (p_s_bh->b_bdev != p_s_sb->s_bdev) { +- reiserfs_panic(p_s_sb, "jmacd-4", "buffer has wrong " ++ if (p_s_bh->b_bdev != sb->s_bdev) { ++ reiserfs_panic(sb, "jmacd-4", "buffer has wrong " + "device %s[%d] (%b)", + descr, level, p_s_bh); + } + +- if (p_s_bh->b_size != p_s_sb->s_blocksize) { +- reiserfs_panic(p_s_sb, "jmacd-5", "buffer has wrong " ++ if (p_s_bh->b_size != sb->s_blocksize) { ++ reiserfs_panic(sb, "jmacd-5", "buffer has wrong " + "blocksize %s[%d] (%b)", + descr, level, p_s_bh); + } + +- if (p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) { +- reiserfs_panic(p_s_sb, "jmacd-6", "buffer block " ++ if (p_s_bh->b_blocknr > SB_BLOCK_COUNT(sb)) { ++ reiserfs_panic(sb, "jmacd-6", "buffer block " + "number too high %s[%d] (%b)", + descr, level, p_s_bh); + } + } + } + #else +-static void tb_buffer_sanity_check(struct super_block *p_s_sb, ++static void tb_buffer_sanity_check(struct super_block *sb, + struct buffer_head *p_s_bh, + const char *descr, int level) + {; +--- a/fs/reiserfs/journal.c ++++ b/fs/reiserfs/journal.c +@@ -97,7 +97,7 @@ static int flush_commit_list(struct supe + struct reiserfs_journal_list *jl, int flushall); + static int can_dirty(struct reiserfs_journal_cnode *cn); + static int journal_join(struct reiserfs_transaction_handle *th, +- struct super_block *p_s_sb, unsigned long nblocks); ++ struct super_block *sb, unsigned long nblocks); + static int release_journal_dev(struct super_block *super, + struct reiserfs_journal *journal); + static int dirty_one_transaction(struct super_block *s, +@@ -113,12 +113,12 @@ enum { + }; + + static int do_journal_begin_r(struct reiserfs_transaction_handle *th, +- struct super_block *p_s_sb, ++ struct super_block *sb, + unsigned long nblocks, int join); + +-static void init_journal_hash(struct super_block *p_s_sb) ++static void init_journal_hash(struct super_block *sb) + { +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + memset(journal->j_hash_table, 0, + JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); + } +@@ -145,7 +145,7 @@ static void disable_barrier(struct super + } + + static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block +- *p_s_sb) ++ *sb) + { + struct reiserfs_bitmap_node *bn; + static int id; +@@ -154,7 +154,7 @@ static struct reiserfs_bitmap_node *allo + if (!bn) { + return NULL; + } +- bn->data = kzalloc(p_s_sb->s_blocksize, GFP_NOFS); ++ bn->data = kzalloc(sb->s_blocksize, GFP_NOFS); + if (!bn->data) { + kfree(bn); + return NULL; +@@ -164,9 +164,9 @@ static struct reiserfs_bitmap_node *allo + return bn; + } + +-static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *p_s_sb) ++static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *sb) + { +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + struct reiserfs_bitmap_node *bn = NULL; + struct list_head *entry = journal->j_bitmap_nodes.next; + +@@ -176,21 +176,21 @@ static struct reiserfs_bitmap_node *get_ + if (entry != &journal->j_bitmap_nodes) { + bn = list_entry(entry, struct reiserfs_bitmap_node, list); + list_del(entry); +- memset(bn->data, 0, p_s_sb->s_blocksize); ++ memset(bn->data, 0, sb->s_blocksize); + journal->j_free_bitmap_nodes--; + return bn; + } +- bn = allocate_bitmap_node(p_s_sb); ++ bn = allocate_bitmap_node(sb); + if (!bn) { + yield(); + goto repeat; + } + return bn; + } +-static inline void free_bitmap_node(struct super_block *p_s_sb, ++static inline void free_bitmap_node(struct super_block *sb, + struct reiserfs_bitmap_node *bn) + { +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + journal->j_used_bitmap_nodes--; + if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) { + kfree(bn->data); +@@ -201,13 +201,13 @@ static inline void free_bitmap_node(stru + } + } + +-static void allocate_bitmap_nodes(struct super_block *p_s_sb) ++static void allocate_bitmap_nodes(struct super_block *sb) + { + int i; +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + struct reiserfs_bitmap_node *bn = NULL; + for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) { +- bn = allocate_bitmap_node(p_s_sb); ++ bn = allocate_bitmap_node(sb); + if (bn) { + list_add(&bn->list, &journal->j_bitmap_nodes); + journal->j_free_bitmap_nodes++; +@@ -217,30 +217,30 @@ static void allocate_bitmap_nodes(struct + } + } + +-static int set_bit_in_list_bitmap(struct super_block *p_s_sb, ++static int set_bit_in_list_bitmap(struct super_block *sb, + b_blocknr_t block, + struct reiserfs_list_bitmap *jb) + { +- unsigned int bmap_nr = block / (p_s_sb->s_blocksize << 3); +- unsigned int bit_nr = block % (p_s_sb->s_blocksize << 3); ++ unsigned int bmap_nr = block / (sb->s_blocksize << 3); ++ unsigned int bit_nr = block % (sb->s_blocksize << 3); + + if (!jb->bitmaps[bmap_nr]) { +- jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb); ++ jb->bitmaps[bmap_nr] = get_bitmap_node(sb); + } + set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data); + return 0; + } + +-static void cleanup_bitmap_list(struct super_block *p_s_sb, ++static void cleanup_bitmap_list(struct super_block *sb, + struct reiserfs_list_bitmap *jb) + { + int i; + if (jb->bitmaps == NULL) + return; + +- for (i = 0; i < reiserfs_bmap_count(p_s_sb); i++) { ++ for (i = 0; i < reiserfs_bmap_count(sb); i++) { + if (jb->bitmaps[i]) { +- free_bitmap_node(p_s_sb, jb->bitmaps[i]); ++ free_bitmap_node(sb, jb->bitmaps[i]); + jb->bitmaps[i] = NULL; + } + } +@@ -249,7 +249,7 @@ static void cleanup_bitmap_list(struct s + /* + ** only call this on FS unmount. + */ +-static int free_list_bitmaps(struct super_block *p_s_sb, ++static int free_list_bitmaps(struct super_block *sb, + struct reiserfs_list_bitmap *jb_array) + { + int i; +@@ -257,16 +257,16 @@ static int free_list_bitmaps(struct supe + for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { + jb = jb_array + i; + jb->journal_list = NULL; +- cleanup_bitmap_list(p_s_sb, jb); ++ cleanup_bitmap_list(sb, jb); + vfree(jb->bitmaps); + jb->bitmaps = NULL; + } + return 0; + } + +-static int free_bitmap_nodes(struct super_block *p_s_sb) ++static int free_bitmap_nodes(struct super_block *sb) + { +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + struct list_head *next = journal->j_bitmap_nodes.next; + struct reiserfs_bitmap_node *bn; + +@@ -286,7 +286,7 @@ static int free_bitmap_nodes(struct supe + ** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. + ** jb_array is the array to be filled in. + */ +-int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb, ++int reiserfs_allocate_list_bitmaps(struct super_block *sb, + struct reiserfs_list_bitmap *jb_array, + unsigned int bmap_nr) + { +@@ -300,7 +300,7 @@ int reiserfs_allocate_list_bitmaps(struc + jb->journal_list = NULL; + jb->bitmaps = vmalloc(mem); + if (!jb->bitmaps) { +- reiserfs_warning(p_s_sb, "clm-2000", "unable to " ++ reiserfs_warning(sb, "clm-2000", "unable to " + "allocate bitmaps for journal lists"); + failed = 1; + break; +@@ -308,7 +308,7 @@ int reiserfs_allocate_list_bitmaps(struc + memset(jb->bitmaps, 0, mem); + } + if (failed) { +- free_list_bitmaps(p_s_sb, jb_array); ++ free_list_bitmaps(sb, jb_array); + return -1; + } + return 0; +@@ -318,12 +318,12 @@ int reiserfs_allocate_list_bitmaps(struc + ** find an available list bitmap. If you can't find one, flush a commit list + ** and try again + */ +-static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *p_s_sb, ++static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb, + struct reiserfs_journal_list + *jl) + { + int i, j; +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + struct reiserfs_list_bitmap *jb = NULL; + + for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) { +@@ -331,7 +331,7 @@ static struct reiserfs_list_bitmap *get_ + journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS; + jb = journal->j_list_bitmap + i; + if (journal->j_list_bitmap[i].journal_list) { +- flush_commit_list(p_s_sb, ++ flush_commit_list(sb, + journal->j_list_bitmap[i]. + journal_list, 1); + if (!journal->j_list_bitmap[i].journal_list) { +@@ -378,12 +378,12 @@ static struct reiserfs_journal_cnode *al + /* + ** pulls a cnode off the free list, or returns NULL on failure + */ +-static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb) ++static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb) + { + struct reiserfs_journal_cnode *cn; +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + +- reiserfs_check_lock_depth(p_s_sb, "get_cnode"); ++ reiserfs_check_lock_depth(sb, "get_cnode"); + + if (journal->j_cnode_free <= 0) { + return NULL; +@@ -405,12 +405,12 @@ static struct reiserfs_journal_cnode *ge + /* + ** returns a cnode to the free list + */ +-static void free_cnode(struct super_block *p_s_sb, ++static void free_cnode(struct super_block *sb, + struct reiserfs_journal_cnode *cn) + { +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + +- reiserfs_check_lock_depth(p_s_sb, "free_cnode"); ++ reiserfs_check_lock_depth(sb, "free_cnode"); + + journal->j_cnode_used--; + journal->j_cnode_free++; +@@ -481,11 +481,11 @@ static inline struct reiserfs_journal_cn + ** reject it on the next call to reiserfs_in_journal + ** + */ +-int reiserfs_in_journal(struct super_block *p_s_sb, ++int reiserfs_in_journal(struct super_block *sb, + unsigned int bmap_nr, int bit_nr, int search_all, + b_blocknr_t * next_zero_bit) + { +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + struct reiserfs_journal_cnode *cn; + struct reiserfs_list_bitmap *jb; + int i; +@@ -493,14 +493,14 @@ int reiserfs_in_journal(struct super_blo + + *next_zero_bit = 0; /* always start this at zero. */ + +- PROC_INFO_INC(p_s_sb, journal.in_journal); ++ PROC_INFO_INC(sb, journal.in_journal); + /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. + ** if we crash before the transaction that freed it commits, this transaction won't + ** have committed either, and the block will never be written + */ + if (search_all) { + for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { +- PROC_INFO_INC(p_s_sb, journal.in_journal_bitmap); ++ PROC_INFO_INC(sb, journal.in_journal_bitmap); + jb = journal->j_list_bitmap + i; + if (jb->journal_list && jb->bitmaps[bmap_nr] && + test_bit(bit_nr, +@@ -510,28 +510,28 @@ int reiserfs_in_journal(struct super_blo + find_next_zero_bit((unsigned long *) + (jb->bitmaps[bmap_nr]-> + data), +- p_s_sb->s_blocksize << 3, ++ sb->s_blocksize << 3, + bit_nr + 1); + return 1; + } + } + } + +- bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr; ++ bl = bmap_nr * (sb->s_blocksize << 3) + bit_nr; + /* is it in any old transactions? */ + if (search_all + && (cn = +- get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) { ++ get_journal_hash_dev(sb, journal->j_list_hash_table, bl))) { + return 1; + } + + /* is it in the current transaction. This should never happen */ +- if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) { ++ if ((cn = get_journal_hash_dev(sb, journal->j_hash_table, bl))) { + BUG(); + return 1; + } + +- PROC_INFO_INC(p_s_sb, journal.in_journal_reusable); ++ PROC_INFO_INC(sb, journal.in_journal_reusable); + /* safe for reuse */ + return 0; + } +@@ -553,16 +553,16 @@ static inline void insert_journal_hash(s + } + + /* lock the current transaction */ +-static inline void lock_journal(struct super_block *p_s_sb) ++static inline void lock_journal(struct super_block *sb) + { +- PROC_INFO_INC(p_s_sb, journal.lock_journal); +- mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex); ++ PROC_INFO_INC(sb, journal.lock_journal); ++ mutex_lock(&SB_JOURNAL(sb)->j_mutex); + } + + /* unlock the current transaction */ +-static inline void unlock_journal(struct super_block *p_s_sb) ++static inline void unlock_journal(struct super_block *sb) + { +- mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex); ++ mutex_unlock(&SB_JOURNAL(sb)->j_mutex); + } + + static inline void get_journal_list(struct reiserfs_journal_list *jl) +@@ -586,13 +586,13 @@ static inline void put_journal_list(stru + ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a + ** transaction. + */ +-static void cleanup_freed_for_journal_list(struct super_block *p_s_sb, ++static void cleanup_freed_for_journal_list(struct super_block *sb, + struct reiserfs_journal_list *jl) + { + + struct reiserfs_list_bitmap *jb = jl->j_list_bitmap; + if (jb) { +- cleanup_bitmap_list(p_s_sb, jb); ++ cleanup_bitmap_list(sb, jb); + } + jl->j_list_bitmap->journal_list = NULL; + jl->j_list_bitmap = NULL; +@@ -1237,11 +1237,11 @@ static void remove_journal_hash(struct s + ** journal list for this transaction. Aside from freeing the cnode, this also allows the + ** block to be reallocated for data blocks if it had been deleted. + */ +-static void remove_all_from_journal_list(struct super_block *p_s_sb, ++static void remove_all_from_journal_list(struct super_block *sb, + struct reiserfs_journal_list *jl, + int debug) + { +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + struct reiserfs_journal_cnode *cn, *last; + cn = jl->j_realblock; + +@@ -1251,18 +1251,18 @@ static void remove_all_from_journal_list + while (cn) { + if (cn->blocknr != 0) { + if (debug) { +- reiserfs_warning(p_s_sb, "reiserfs-2201", ++ reiserfs_warning(sb, "reiserfs-2201", + "block %u, bh is %d, state %ld", + cn->blocknr, cn->bh ? 1 : 0, + cn->state); + } + cn->state = 0; +- remove_journal_hash(p_s_sb, journal->j_list_hash_table, ++ remove_journal_hash(sb, journal->j_list_hash_table, + jl, cn->blocknr, 1); + } + last = cn; + cn = cn->next; +- free_cnode(p_s_sb, last); ++ free_cnode(sb, last); + } + jl->j_realblock = NULL; + } +@@ -1274,12 +1274,12 @@ static void remove_all_from_journal_list + ** called by flush_journal_list, before it calls remove_all_from_journal_list + ** + */ +-static int _update_journal_header_block(struct super_block *p_s_sb, ++static int _update_journal_header_block(struct super_block *sb, + unsigned long offset, + unsigned int trans_id) + { + struct reiserfs_journal_header *jh; +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + + if (reiserfs_is_journal_aborted(journal)) + return -EIO; +@@ -1289,7 +1289,7 @@ static int _update_journal_header_block( + wait_on_buffer((journal->j_header_bh)); + if (unlikely(!buffer_uptodate(journal->j_header_bh))) { + #ifdef CONFIG_REISERFS_CHECK +- reiserfs_warning(p_s_sb, "journal-699", ++ reiserfs_warning(sb, "journal-699", + "buffer write failed"); + #endif + return -EIO; +@@ -1303,24 +1303,24 @@ static int _update_journal_header_block( + jh->j_first_unflushed_offset = cpu_to_le32(offset); + jh->j_mount_id = cpu_to_le32(journal->j_mount_id); + +- if (reiserfs_barrier_flush(p_s_sb)) { ++ if (reiserfs_barrier_flush(sb)) { + int ret; + lock_buffer(journal->j_header_bh); + ret = submit_barrier_buffer(journal->j_header_bh); + if (ret == -EOPNOTSUPP) { + set_buffer_uptodate(journal->j_header_bh); +- disable_barrier(p_s_sb); ++ disable_barrier(sb); + goto sync; + } + wait_on_buffer(journal->j_header_bh); +- check_barrier_completion(p_s_sb, journal->j_header_bh); ++ check_barrier_completion(sb, journal->j_header_bh); + } else { + sync: + set_buffer_dirty(journal->j_header_bh); + sync_dirty_buffer(journal->j_header_bh); + } + if (!buffer_uptodate(journal->j_header_bh)) { +- reiserfs_warning(p_s_sb, "journal-837", ++ reiserfs_warning(sb, "journal-837", + "IO error during journal replay"); + return -EIO; + } +@@ -1328,23 +1328,23 @@ static int _update_journal_header_block( + return 0; + } + +-static int update_journal_header_block(struct super_block *p_s_sb, ++static int update_journal_header_block(struct super_block *sb, + unsigned long offset, + unsigned int trans_id) + { +- return _update_journal_header_block(p_s_sb, offset, trans_id); ++ return _update_journal_header_block(sb, offset, trans_id); + } + + /* + ** flush any and all journal lists older than you are + ** can only be called from flush_journal_list + */ +-static int flush_older_journal_lists(struct super_block *p_s_sb, ++static int flush_older_journal_lists(struct super_block *sb, + struct reiserfs_journal_list *jl) + { + struct list_head *entry; + struct reiserfs_journal_list *other_jl; +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + unsigned int trans_id = jl->j_trans_id; + + /* we know we are the only ones flushing things, no extra race +@@ -1359,7 +1359,7 @@ static int flush_older_journal_lists(str + if (other_jl->j_trans_id < trans_id) { + BUG_ON(other_jl->j_refcount <= 0); + /* do not flush all */ +- flush_journal_list(p_s_sb, other_jl, 0); ++ flush_journal_list(sb, other_jl, 0); + + /* other_jl is now deleted from the list */ + goto restart; +@@ -1908,22 +1908,22 @@ void remove_journal_hash(struct super_bl + } + } + +-static void free_journal_ram(struct super_block *p_s_sb) ++static void free_journal_ram(struct super_block *sb) + { +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + kfree(journal->j_current_jl); + journal->j_num_lists--; + + vfree(journal->j_cnode_free_orig); +- free_list_bitmaps(p_s_sb, journal->j_list_bitmap); +- free_bitmap_nodes(p_s_sb); /* must be after free_list_bitmaps */ ++ free_list_bitmaps(sb, journal->j_list_bitmap); ++ free_bitmap_nodes(sb); /* must be after free_list_bitmaps */ + if (journal->j_header_bh) { + brelse(journal->j_header_bh); + } + /* j_header_bh is on the journal dev, make sure not to release the journal + * dev until we brelse j_header_bh + */ +- release_journal_dev(p_s_sb, journal); ++ release_journal_dev(sb, journal); + vfree(journal); + } + +@@ -1932,27 +1932,27 @@ static void free_journal_ram(struct supe + ** of read_super() yet. Any other caller must keep error at 0. + */ + static int do_journal_release(struct reiserfs_transaction_handle *th, +- struct super_block *p_s_sb, int error) ++ struct super_block *sb, int error) + { + struct reiserfs_transaction_handle myth; + int flushed = 0; +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + + /* we only want to flush out transactions if we were called with error == 0 + */ +- if (!error && !(p_s_sb->s_flags & MS_RDONLY)) { ++ if (!error && !(sb->s_flags & MS_RDONLY)) { + /* end the current trans */ + BUG_ON(!th->t_trans_id); +- do_journal_end(th, p_s_sb, 10, FLUSH_ALL); ++ do_journal_end(th, sb, 10, FLUSH_ALL); + + /* make sure something gets logged to force our way into the flush code */ +- if (!journal_join(&myth, p_s_sb, 1)) { +- reiserfs_prepare_for_journal(p_s_sb, +- SB_BUFFER_WITH_SB(p_s_sb), ++ if (!journal_join(&myth, sb, 1)) { ++ reiserfs_prepare_for_journal(sb, ++ SB_BUFFER_WITH_SB(sb), + 1); +- journal_mark_dirty(&myth, p_s_sb, +- SB_BUFFER_WITH_SB(p_s_sb)); +- do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL); ++ journal_mark_dirty(&myth, sb, ++ SB_BUFFER_WITH_SB(sb)); ++ do_journal_end(&myth, sb, 1, FLUSH_ALL); + flushed = 1; + } + } +@@ -1960,26 +1960,26 @@ static int do_journal_release(struct rei + /* this also catches errors during the do_journal_end above */ + if (!error && reiserfs_is_journal_aborted(journal)) { + memset(&myth, 0, sizeof(myth)); +- if (!journal_join_abort(&myth, p_s_sb, 1)) { +- reiserfs_prepare_for_journal(p_s_sb, +- SB_BUFFER_WITH_SB(p_s_sb), ++ if (!journal_join_abort(&myth, sb, 1)) { ++ reiserfs_prepare_for_journal(sb, ++ SB_BUFFER_WITH_SB(sb), + 1); +- journal_mark_dirty(&myth, p_s_sb, +- SB_BUFFER_WITH_SB(p_s_sb)); +- do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL); ++ journal_mark_dirty(&myth, sb, ++ SB_BUFFER_WITH_SB(sb)); ++ do_journal_end(&myth, sb, 1, FLUSH_ALL); + } + } + + reiserfs_mounted_fs_count--; + /* wait for all commits to finish */ +- cancel_delayed_work(&SB_JOURNAL(p_s_sb)->j_work); ++ cancel_delayed_work(&SB_JOURNAL(sb)->j_work); + flush_workqueue(commit_wq); + if (!reiserfs_mounted_fs_count) { + destroy_workqueue(commit_wq); + commit_wq = NULL; + } + +- free_journal_ram(p_s_sb); ++ free_journal_ram(sb); + + return 0; + } +@@ -1988,28 +1988,28 @@ static int do_journal_release(struct rei + ** call on unmount. flush all journal trans, release all alloc'd ram + */ + int journal_release(struct reiserfs_transaction_handle *th, +- struct super_block *p_s_sb) ++ struct super_block *sb) + { +- return do_journal_release(th, p_s_sb, 0); ++ return do_journal_release(th, sb, 0); + } + + /* + ** only call from an error condition inside reiserfs_read_super! + */ + int journal_release_error(struct reiserfs_transaction_handle *th, +- struct super_block *p_s_sb) ++ struct super_block *sb) + { +- return do_journal_release(th, p_s_sb, 1); ++ return do_journal_release(th, sb, 1); + } + + /* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ +-static int journal_compare_desc_commit(struct super_block *p_s_sb, ++static int journal_compare_desc_commit(struct super_block *sb, + struct reiserfs_journal_desc *desc, + struct reiserfs_journal_commit *commit) + { + if (get_commit_trans_id(commit) != get_desc_trans_id(desc) || + get_commit_trans_len(commit) != get_desc_trans_len(desc) || +- get_commit_trans_len(commit) > SB_JOURNAL(p_s_sb)->j_trans_max || ++ get_commit_trans_len(commit) > SB_JOURNAL(sb)->j_trans_max || + get_commit_trans_len(commit) <= 0) { + return 1; + } +@@ -2020,7 +2020,7 @@ static int journal_compare_desc_commit(s + ** returns -1 if it found a corrupt commit block + ** returns 1 if both desc and commit were valid + */ +-static int journal_transaction_is_valid(struct super_block *p_s_sb, ++static int journal_transaction_is_valid(struct super_block *sb, + struct buffer_head *d_bh, + unsigned int *oldest_invalid_trans_id, + unsigned long *newest_mount_id) +@@ -2038,7 +2038,7 @@ static int journal_transaction_is_valid( + && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) { + if (oldest_invalid_trans_id && *oldest_invalid_trans_id + && get_desc_trans_id(desc) > *oldest_invalid_trans_id) { +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, + "journal-986: transaction " + "is valid returning because trans_id %d is greater than " + "oldest_invalid %lu", +@@ -2048,7 +2048,7 @@ static int journal_transaction_is_valid( + } + if (newest_mount_id + && *newest_mount_id > get_desc_mount_id(desc)) { +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, + "journal-1087: transaction " + "is valid returning because mount_id %d is less than " + "newest_mount_id %lu", +@@ -2056,37 +2056,37 @@ static int journal_transaction_is_valid( + *newest_mount_id); + return -1; + } +- if (get_desc_trans_len(desc) > SB_JOURNAL(p_s_sb)->j_trans_max) { +- reiserfs_warning(p_s_sb, "journal-2018", ++ if (get_desc_trans_len(desc) > SB_JOURNAL(sb)->j_trans_max) { ++ reiserfs_warning(sb, "journal-2018", + "Bad transaction length %d " + "encountered, ignoring transaction", + get_desc_trans_len(desc)); + return -1; + } +- offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); ++ offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb); + + /* ok, we have a journal description block, lets see if the transaction was valid */ + c_bh = +- journal_bread(p_s_sb, +- SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + ++ journal_bread(sb, ++ SB_ONDISK_JOURNAL_1st_BLOCK(sb) + + ((offset + get_desc_trans_len(desc) + +- 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))); ++ 1) % SB_ONDISK_JOURNAL_SIZE(sb))); + if (!c_bh) + return 0; + commit = (struct reiserfs_journal_commit *)c_bh->b_data; +- if (journal_compare_desc_commit(p_s_sb, desc, commit)) { +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, ++ if (journal_compare_desc_commit(sb, desc, commit)) { ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, + "journal_transaction_is_valid, commit offset %ld had bad " + "time %d or length %d", + c_bh->b_blocknr - +- SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), ++ SB_ONDISK_JOURNAL_1st_BLOCK(sb), + get_commit_trans_id(commit), + get_commit_trans_len(commit)); + brelse(c_bh); + if (oldest_invalid_trans_id) { + *oldest_invalid_trans_id = + get_desc_trans_id(desc); +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, + "journal-1004: " + "transaction_is_valid setting oldest invalid trans_id " + "to %d", +@@ -2095,11 +2095,11 @@ static int journal_transaction_is_valid( + return -1; + } + brelse(c_bh); +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, + "journal-1006: found valid " + "transaction start offset %llu, len %d id %d", + d_bh->b_blocknr - +- SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), ++ SB_ONDISK_JOURNAL_1st_BLOCK(sb), + get_desc_trans_len(desc), + get_desc_trans_id(desc)); + return 1; +@@ -2121,13 +2121,13 @@ static void brelse_array(struct buffer_h + ** this either reads in a replays a transaction, or returns because the transaction + ** is invalid, or too old. + */ +-static int journal_read_transaction(struct super_block *p_s_sb, ++static int journal_read_transaction(struct super_block *sb, + unsigned long cur_dblock, + unsigned long oldest_start, + unsigned int oldest_trans_id, + unsigned long newest_mount_id) + { +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + struct reiserfs_journal_desc *desc; + struct reiserfs_journal_commit *commit; + unsigned int trans_id = 0; +@@ -2139,45 +2139,45 @@ static int journal_read_transaction(stru + int i; + int trans_half; + +- d_bh = journal_bread(p_s_sb, cur_dblock); ++ d_bh = journal_bread(sb, cur_dblock); + if (!d_bh) + return 1; + desc = (struct reiserfs_journal_desc *)d_bh->b_data; +- trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: " ++ trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb); ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1037: " + "journal_read_transaction, offset %llu, len %d mount_id %d", +- d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), ++ d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb), + get_desc_trans_len(desc), get_desc_mount_id(desc)); + if (get_desc_trans_id(desc) < oldest_trans_id) { +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: " ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1039: " + "journal_read_trans skipping because %lu is too old", + cur_dblock - +- SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)); ++ SB_ONDISK_JOURNAL_1st_BLOCK(sb)); + brelse(d_bh); + return 1; + } + if (get_desc_mount_id(desc) != newest_mount_id) { +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: " ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1146: " + "journal_read_trans skipping because %d is != " + "newest_mount_id %lu", get_desc_mount_id(desc), + newest_mount_id); + brelse(d_bh); + return 1; + } +- c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + ++ c_bh = journal_bread(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) + + ((trans_offset + get_desc_trans_len(desc) + 1) % +- SB_ONDISK_JOURNAL_SIZE(p_s_sb))); ++ SB_ONDISK_JOURNAL_SIZE(sb))); + if (!c_bh) { + brelse(d_bh); + return 1; + } + commit = (struct reiserfs_journal_commit *)c_bh->b_data; +- if (journal_compare_desc_commit(p_s_sb, desc, commit)) { +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, ++ if (journal_compare_desc_commit(sb, desc, commit)) { ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, + "journal_read_transaction, " + "commit offset %llu had bad time %d or length %d", + c_bh->b_blocknr - +- SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), ++ SB_ONDISK_JOURNAL_1st_BLOCK(sb), + get_commit_trans_id(commit), + get_commit_trans_len(commit)); + brelse(c_bh); +@@ -2195,30 +2195,30 @@ static int journal_read_transaction(stru + brelse(d_bh); + kfree(log_blocks); + kfree(real_blocks); +- reiserfs_warning(p_s_sb, "journal-1169", ++ reiserfs_warning(sb, "journal-1169", + "kmalloc failed, unable to mount FS"); + return -1; + } + /* get all the buffer heads */ +- trans_half = journal_trans_half(p_s_sb->s_blocksize); ++ trans_half = journal_trans_half(sb->s_blocksize); + for (i = 0; i < get_desc_trans_len(desc); i++) { + log_blocks[i] = +- journal_getblk(p_s_sb, +- SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + ++ journal_getblk(sb, ++ SB_ONDISK_JOURNAL_1st_BLOCK(sb) + + (trans_offset + 1 + +- i) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)); ++ i) % SB_ONDISK_JOURNAL_SIZE(sb)); + if (i < trans_half) { + real_blocks[i] = +- sb_getblk(p_s_sb, ++ sb_getblk(sb, + le32_to_cpu(desc->j_realblock[i])); + } else { + real_blocks[i] = +- sb_getblk(p_s_sb, ++ sb_getblk(sb, + le32_to_cpu(commit-> + j_realblock[i - trans_half])); + } +- if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) { +- reiserfs_warning(p_s_sb, "journal-1207", ++ if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(sb)) { ++ reiserfs_warning(sb, "journal-1207", + "REPLAY FAILURE fsck required! " + "Block to replay is outside of " + "filesystem"); +@@ -2226,8 +2226,8 @@ static int journal_read_transaction(stru + } + /* make sure we don't try to replay onto log or reserved area */ + if (is_block_in_log_or_reserved_area +- (p_s_sb, real_blocks[i]->b_blocknr)) { +- reiserfs_warning(p_s_sb, "journal-1204", ++ (sb, real_blocks[i]->b_blocknr)) { ++ reiserfs_warning(sb, "journal-1204", + "REPLAY FAILURE fsck required! " + "Trying to replay onto a log block"); + abort_replay: +@@ -2245,7 +2245,7 @@ static int journal_read_transaction(stru + for (i = 0; i < get_desc_trans_len(desc); i++) { + wait_on_buffer(log_blocks[i]); + if (!buffer_uptodate(log_blocks[i])) { +- reiserfs_warning(p_s_sb, "journal-1212", ++ reiserfs_warning(sb, "journal-1212", + "REPLAY FAILURE fsck required! " + "buffer write failed"); + brelse_array(log_blocks + i, +@@ -2270,7 +2270,7 @@ static int journal_read_transaction(stru + for (i = 0; i < get_desc_trans_len(desc); i++) { + wait_on_buffer(real_blocks[i]); + if (!buffer_uptodate(real_blocks[i])) { +- reiserfs_warning(p_s_sb, "journal-1226", ++ reiserfs_warning(sb, "journal-1226", + "REPLAY FAILURE, fsck required! " + "buffer write failed"); + brelse_array(real_blocks + i, +@@ -2284,15 +2284,15 @@ static int journal_read_transaction(stru + brelse(real_blocks[i]); + } + cur_dblock = +- SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + ++ SB_ONDISK_JOURNAL_1st_BLOCK(sb) + + ((trans_offset + get_desc_trans_len(desc) + +- 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)); +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, ++ 2) % SB_ONDISK_JOURNAL_SIZE(sb)); ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, + "journal-1095: setting journal " "start to offset %ld", +- cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)); ++ cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb)); + + /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ +- journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); ++ journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb); + journal->j_last_flush_trans_id = trans_id; + journal->j_trans_id = trans_id + 1; + /* check for trans_id overflow */ +@@ -2357,9 +2357,9 @@ static struct buffer_head *reiserfs_brea + ** + ** On exit, it sets things up so the first transaction will work correctly. + */ +-static int journal_read(struct super_block *p_s_sb) ++static int journal_read(struct super_block *sb) + { +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + struct reiserfs_journal_desc *desc; + unsigned int oldest_trans_id = 0; + unsigned int oldest_invalid_trans_id = 0; +@@ -2375,8 +2375,8 @@ static int journal_read(struct super_blo + int ret; + char b[BDEVNAME_SIZE]; + +- cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); +- reiserfs_info(p_s_sb, "checking transaction log (%s)\n", ++ cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb); ++ reiserfs_info(sb, "checking transaction log (%s)\n", + bdevname(journal->j_dev_bd, b)); + start = get_seconds(); + +@@ -2384,22 +2384,22 @@ static int journal_read(struct super_blo + ** is the first unflushed, and if that transaction is not valid, + ** replay is done + */ +- journal->j_header_bh = journal_bread(p_s_sb, +- SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +- + SB_ONDISK_JOURNAL_SIZE(p_s_sb)); ++ journal->j_header_bh = journal_bread(sb, ++ SB_ONDISK_JOURNAL_1st_BLOCK(sb) ++ + SB_ONDISK_JOURNAL_SIZE(sb)); + if (!journal->j_header_bh) { + return 1; + } + jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data); + if (le32_to_cpu(jh->j_first_unflushed_offset) < +- SB_ONDISK_JOURNAL_SIZE(p_s_sb) ++ SB_ONDISK_JOURNAL_SIZE(sb) + && le32_to_cpu(jh->j_last_flush_trans_id) > 0) { + oldest_start = +- SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + ++ SB_ONDISK_JOURNAL_1st_BLOCK(sb) + + le32_to_cpu(jh->j_first_unflushed_offset); + oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1; + newest_mount_id = le32_to_cpu(jh->j_mount_id); +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, + "journal-1153: found in " + "header: first_unflushed_offset %d, last_flushed_trans_id " + "%lu", le32_to_cpu(jh->j_first_unflushed_offset), +@@ -2411,10 +2411,10 @@ static int journal_read(struct super_blo + ** through the whole log. + */ + d_bh = +- journal_bread(p_s_sb, +- SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + ++ journal_bread(sb, ++ SB_ONDISK_JOURNAL_1st_BLOCK(sb) + + le32_to_cpu(jh->j_first_unflushed_offset)); +- ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL); ++ ret = journal_transaction_is_valid(sb, d_bh, NULL, NULL); + if (!ret) { + continue_replay = 0; + } +@@ -2422,8 +2422,8 @@ static int journal_read(struct super_blo + goto start_log_replay; + } + +- if (continue_replay && bdev_read_only(p_s_sb->s_bdev)) { +- reiserfs_warning(p_s_sb, "clm-2076", ++ if (continue_replay && bdev_read_only(sb->s_bdev)) { ++ reiserfs_warning(sb, "clm-2076", + "device is readonly, unable to replay log"); + return -1; + } +@@ -2433,17 +2433,17 @@ static int journal_read(struct super_blo + */ + while (continue_replay + && cur_dblock < +- (SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + +- SB_ONDISK_JOURNAL_SIZE(p_s_sb))) { ++ (SB_ONDISK_JOURNAL_1st_BLOCK(sb) + ++ SB_ONDISK_JOURNAL_SIZE(sb))) { + /* Note that it is required for blocksize of primary fs device and journal + device to be the same */ + d_bh = + reiserfs_breada(journal->j_dev_bd, cur_dblock, +- p_s_sb->s_blocksize, +- SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + +- SB_ONDISK_JOURNAL_SIZE(p_s_sb)); ++ sb->s_blocksize, ++ SB_ONDISK_JOURNAL_1st_BLOCK(sb) + ++ SB_ONDISK_JOURNAL_SIZE(sb)); + ret = +- journal_transaction_is_valid(p_s_sb, d_bh, ++ journal_transaction_is_valid(sb, d_bh, + &oldest_invalid_trans_id, + &newest_mount_id); + if (ret == 1) { +@@ -2452,26 +2452,26 @@ static int journal_read(struct super_blo + oldest_trans_id = get_desc_trans_id(desc); + oldest_start = d_bh->b_blocknr; + newest_mount_id = get_desc_mount_id(desc); +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, + "journal-1179: Setting " + "oldest_start to offset %llu, trans_id %lu", + oldest_start - + SB_ONDISK_JOURNAL_1st_BLOCK +- (p_s_sb), oldest_trans_id); ++ (sb), oldest_trans_id); + } else if (oldest_trans_id > get_desc_trans_id(desc)) { + /* one we just read was older */ + oldest_trans_id = get_desc_trans_id(desc); + oldest_start = d_bh->b_blocknr; +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, + "journal-1180: Resetting " + "oldest_start to offset %lu, trans_id %lu", + oldest_start - + SB_ONDISK_JOURNAL_1st_BLOCK +- (p_s_sb), oldest_trans_id); ++ (sb), oldest_trans_id); + } + if (newest_mount_id < get_desc_mount_id(desc)) { + newest_mount_id = get_desc_mount_id(desc); +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, + "journal-1299: Setting " + "newest_mount_id to %d", + get_desc_mount_id(desc)); +@@ -2486,17 +2486,17 @@ static int journal_read(struct super_blo + start_log_replay: + cur_dblock = oldest_start; + if (oldest_trans_id) { +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, + "journal-1206: Starting replay " + "from offset %llu, trans_id %lu", +- cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), ++ cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb), + oldest_trans_id); + + } + replay_count = 0; + while (continue_replay && oldest_trans_id > 0) { + ret = +- journal_read_transaction(p_s_sb, cur_dblock, oldest_start, ++ journal_read_transaction(sb, cur_dblock, oldest_start, + oldest_trans_id, newest_mount_id); + if (ret < 0) { + return ret; +@@ -2504,14 +2504,14 @@ static int journal_read(struct super_blo + break; + } + cur_dblock = +- SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start; ++ SB_ONDISK_JOURNAL_1st_BLOCK(sb) + journal->j_start; + replay_count++; + if (cur_dblock == oldest_start) + break; + } + + if (oldest_trans_id == 0) { +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, + "journal-1225: No valid " "transactions found"); + } + /* j_start does not get set correctly if we don't replay any transactions. +@@ -2531,16 +2531,16 @@ static int journal_read(struct super_blo + } else { + journal->j_mount_id = newest_mount_id + 1; + } +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting " ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1299: Setting " + "newest_mount_id to %lu", journal->j_mount_id); + journal->j_first_unflushed_offset = journal->j_start; + if (replay_count > 0) { +- reiserfs_info(p_s_sb, ++ reiserfs_info(sb, + "replayed %d transactions in %lu seconds\n", + replay_count, get_seconds() - start); + } +- if (!bdev_read_only(p_s_sb->s_bdev) && +- _update_journal_header_block(p_s_sb, journal->j_start, ++ if (!bdev_read_only(sb->s_bdev) && ++ _update_journal_header_block(sb, journal->j_start, + journal->j_last_flush_trans_id)) { + /* replay failed, caller must call free_journal_ram and abort + ** the mount +@@ -2565,9 +2565,9 @@ static struct reiserfs_journal_list *all + return jl; + } + +-static void journal_list_init(struct super_block *p_s_sb) ++static void journal_list_init(struct super_block *sb) + { +- SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb); ++ SB_JOURNAL(sb)->j_current_jl = alloc_journal_list(sb); + } + + static int release_journal_dev(struct super_block *super, +@@ -2663,28 +2663,28 @@ static int journal_init_dev(struct super + */ + #define REISERFS_STANDARD_BLKSIZE (4096) + +-static int check_advise_trans_params(struct super_block *p_s_sb, ++static int check_advise_trans_params(struct super_block *sb, + struct reiserfs_journal *journal) + { + if (journal->j_trans_max) { + /* Non-default journal params. + Do sanity check for them. */ + int ratio = 1; +- if (p_s_sb->s_blocksize < REISERFS_STANDARD_BLKSIZE) +- ratio = REISERFS_STANDARD_BLKSIZE / p_s_sb->s_blocksize; ++ if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE) ++ ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize; + + if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio || + journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio || +- SB_ONDISK_JOURNAL_SIZE(p_s_sb) / journal->j_trans_max < ++ SB_ONDISK_JOURNAL_SIZE(sb) / journal->j_trans_max < + JOURNAL_MIN_RATIO) { +- reiserfs_warning(p_s_sb, "sh-462", ++ reiserfs_warning(sb, "sh-462", + "bad transaction max size (%u). " + "FSCK?", journal->j_trans_max); + return 1; + } + if (journal->j_max_batch != (journal->j_trans_max) * + JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) { +- reiserfs_warning(p_s_sb, "sh-463", ++ reiserfs_warning(sb, "sh-463", + "bad transaction max batch (%u). " + "FSCK?", journal->j_max_batch); + return 1; +@@ -2694,9 +2694,9 @@ static int check_advise_trans_params(str + The file system was created by old version + of mkreiserfs, so some fields contain zeros, + and we need to advise proper values for them */ +- if (p_s_sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) { +- reiserfs_warning(p_s_sb, "sh-464", "bad blocksize (%u)", +- p_s_sb->s_blocksize); ++ if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) { ++ reiserfs_warning(sb, "sh-464", "bad blocksize (%u)", ++ sb->s_blocksize); + return 1; + } + journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT; +@@ -2709,10 +2709,10 @@ static int check_advise_trans_params(str + /* + ** must be called once on fs mount. calls journal_read for you + */ +-int journal_init(struct super_block *p_s_sb, const char *j_dev_name, ++int journal_init(struct super_block *sb, const char *j_dev_name, + int old_format, unsigned int commit_max_age) + { +- int num_cnodes = SB_ONDISK_JOURNAL_SIZE(p_s_sb) * 2; ++ int num_cnodes = SB_ONDISK_JOURNAL_SIZE(sb) * 2; + struct buffer_head *bhjh; + struct reiserfs_super_block *rs; + struct reiserfs_journal_header *jh; +@@ -2720,9 +2720,9 @@ int journal_init(struct super_block *p_s + struct reiserfs_journal_list *jl; + char b[BDEVNAME_SIZE]; + +- journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof(struct reiserfs_journal)); ++ journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal)); + if (!journal) { +- reiserfs_warning(p_s_sb, "journal-1256", ++ reiserfs_warning(sb, "journal-1256", + "unable to get memory for journal structure"); + return 1; + } +@@ -2732,50 +2732,50 @@ int journal_init(struct super_block *p_s + INIT_LIST_HEAD(&journal->j_working_list); + INIT_LIST_HEAD(&journal->j_journal_list); + journal->j_persistent_trans = 0; +- if (reiserfs_allocate_list_bitmaps(p_s_sb, ++ if (reiserfs_allocate_list_bitmaps(sb, + journal->j_list_bitmap, +- reiserfs_bmap_count(p_s_sb))) ++ reiserfs_bmap_count(sb))) + goto free_and_return; +- allocate_bitmap_nodes(p_s_sb); ++ allocate_bitmap_nodes(sb); + + /* reserved for journal area support */ +- SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ? ++ SB_JOURNAL_1st_RESERVED_BLOCK(sb) = (old_format ? + REISERFS_OLD_DISK_OFFSET_IN_BYTES +- / p_s_sb->s_blocksize + +- reiserfs_bmap_count(p_s_sb) + ++ / sb->s_blocksize + ++ reiserfs_bmap_count(sb) + + 1 : + REISERFS_DISK_OFFSET_IN_BYTES / +- p_s_sb->s_blocksize + 2); ++ sb->s_blocksize + 2); + + /* Sanity check to see is the standard journal fitting withing first bitmap + (actual for small blocksizes) */ +- if (!SB_ONDISK_JOURNAL_DEVICE(p_s_sb) && +- (SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) + +- SB_ONDISK_JOURNAL_SIZE(p_s_sb) > p_s_sb->s_blocksize * 8)) { +- reiserfs_warning(p_s_sb, "journal-1393", ++ if (!SB_ONDISK_JOURNAL_DEVICE(sb) && ++ (SB_JOURNAL_1st_RESERVED_BLOCK(sb) + ++ SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) { ++ reiserfs_warning(sb, "journal-1393", + "journal does not fit for area addressed " + "by first of bitmap blocks. It starts at " + "%u and its size is %u. Block size %ld", +- SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb), +- SB_ONDISK_JOURNAL_SIZE(p_s_sb), +- p_s_sb->s_blocksize); ++ SB_JOURNAL_1st_RESERVED_BLOCK(sb), ++ SB_ONDISK_JOURNAL_SIZE(sb), ++ sb->s_blocksize); + goto free_and_return; + } + +- if (journal_init_dev(p_s_sb, journal, j_dev_name) != 0) { +- reiserfs_warning(p_s_sb, "sh-462", ++ if (journal_init_dev(sb, journal, j_dev_name) != 0) { ++ reiserfs_warning(sb, "sh-462", + "unable to initialize jornal device"); + goto free_and_return; + } + +- rs = SB_DISK_SUPER_BLOCK(p_s_sb); ++ rs = SB_DISK_SUPER_BLOCK(sb); + + /* read journal header */ +- bhjh = journal_bread(p_s_sb, +- SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + +- SB_ONDISK_JOURNAL_SIZE(p_s_sb)); ++ bhjh = journal_bread(sb, ++ SB_ONDISK_JOURNAL_1st_BLOCK(sb) + ++ SB_ONDISK_JOURNAL_SIZE(sb)); + if (!bhjh) { +- reiserfs_warning(p_s_sb, "sh-459", ++ reiserfs_warning(sb, "sh-459", + "unable to read journal header"); + goto free_and_return; + } +@@ -2785,7 +2785,7 @@ int journal_init(struct super_block *p_s + if (is_reiserfs_jr(rs) + && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != + sb_jp_journal_magic(rs))) { +- reiserfs_warning(p_s_sb, "sh-460", ++ reiserfs_warning(sb, "sh-460", + "journal header magic %x (device %s) does " + "not match to magic found in super block %x", + jh->jh_journal.jp_journal_magic, +@@ -2801,7 +2801,7 @@ int journal_init(struct super_block *p_s + le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age); + journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; + +- if (check_advise_trans_params(p_s_sb, journal) != 0) ++ if (check_advise_trans_params(sb, journal) != 0) + goto free_and_return; + journal->j_default_max_commit_age = journal->j_max_commit_age; + +@@ -2810,12 +2810,12 @@ int journal_init(struct super_block *p_s + journal->j_max_trans_age = commit_max_age; + } + +- reiserfs_info(p_s_sb, "journal params: device %s, size %u, " ++ reiserfs_info(sb, "journal params: device %s, size %u, " + "journal first block %u, max trans len %u, max batch %u, " + "max commit age %u, max trans age %u\n", + bdevname(journal->j_dev_bd, b), +- SB_ONDISK_JOURNAL_SIZE(p_s_sb), +- SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), ++ SB_ONDISK_JOURNAL_SIZE(sb), ++ SB_ONDISK_JOURNAL_1st_BLOCK(sb), + journal->j_trans_max, + journal->j_max_batch, + journal->j_max_commit_age, journal->j_max_trans_age); +@@ -2823,7 +2823,7 @@ int journal_init(struct super_block *p_s + brelse(bhjh); + + journal->j_list_bitmap_index = 0; +- journal_list_init(p_s_sb); ++ journal_list_init(sb); + + memset(journal->j_list_hash_table, 0, + JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); +@@ -2855,7 +2855,7 @@ int journal_init(struct super_block *p_s + journal->j_must_wait = 0; + + if (journal->j_cnode_free == 0) { +- reiserfs_warning(p_s_sb, "journal-2004", "Journal cnode memory " ++ reiserfs_warning(sb, "journal-2004", "Journal cnode memory " + "allocation failed (%ld bytes). Journal is " + "too large for available memory. Usually " + "this is due to a journal that is too large.", +@@ -2863,16 +2863,16 @@ int journal_init(struct super_block *p_s + goto free_and_return; + } + +- init_journal_hash(p_s_sb); ++ init_journal_hash(sb); + jl = journal->j_current_jl; +- jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl); ++ jl->j_list_bitmap = get_list_bitmap(sb, jl); + if (!jl->j_list_bitmap) { +- reiserfs_warning(p_s_sb, "journal-2005", ++ reiserfs_warning(sb, "journal-2005", + "get_list_bitmap failed for journal list 0"); + goto free_and_return; + } +- if (journal_read(p_s_sb) < 0) { +- reiserfs_warning(p_s_sb, "reiserfs-2006", ++ if (journal_read(sb) < 0) { ++ reiserfs_warning(sb, "reiserfs-2006", + "Replay Failure, unable to mount"); + goto free_and_return; + } +@@ -2882,10 +2882,10 @@ int journal_init(struct super_block *p_s + commit_wq = create_workqueue("reiserfs"); + + INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); +- journal->j_work_sb = p_s_sb; ++ journal->j_work_sb = sb; + return 0; + free_and_return: +- free_journal_ram(p_s_sb); ++ free_journal_ram(sb); + return 1; + } + +@@ -3001,37 +3001,37 @@ static void let_transaction_grow(struct + ** expect to use in nblocks. + */ + static int do_journal_begin_r(struct reiserfs_transaction_handle *th, +- struct super_block *p_s_sb, unsigned long nblocks, ++ struct super_block *sb, unsigned long nblocks, + int join) + { + time_t now = get_seconds(); + unsigned int old_trans_id; +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + struct reiserfs_transaction_handle myth; + int sched_count = 0; + int retval; + +- reiserfs_check_lock_depth(p_s_sb, "journal_begin"); ++ reiserfs_check_lock_depth(sb, "journal_begin"); + BUG_ON(nblocks > journal->j_trans_max); + +- PROC_INFO_INC(p_s_sb, journal.journal_being); ++ PROC_INFO_INC(sb, journal.journal_being); + /* set here for journal_join */ + th->t_refcount = 1; +- th->t_super = p_s_sb; ++ th->t_super = sb; + + relock: +- lock_journal(p_s_sb); ++ lock_journal(sb); + if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) { +- unlock_journal(p_s_sb); ++ unlock_journal(sb); + retval = journal->j_errno; + goto out_fail; + } + journal->j_bcount++; + + if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { +- unlock_journal(p_s_sb); +- reiserfs_wait_on_write_block(p_s_sb); +- PROC_INFO_INC(p_s_sb, journal.journal_relock_writers); ++ unlock_journal(sb); ++ reiserfs_wait_on_write_block(sb); ++ PROC_INFO_INC(sb, journal.journal_relock_writers); + goto relock; + } + now = get_seconds(); +@@ -3052,7 +3052,7 @@ static int do_journal_begin_r(struct rei + || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { + + old_trans_id = journal->j_trans_id; +- unlock_journal(p_s_sb); /* allow others to finish this transaction */ ++ unlock_journal(sb); /* allow others to finish this transaction */ + + if (!join && (journal->j_len_alloc + nblocks + 2) >= + journal->j_max_batch && +@@ -3060,7 +3060,7 @@ static int do_journal_begin_r(struct rei + (journal->j_len_alloc * 75)) { + if (atomic_read(&journal->j_wcount) > 10) { + sched_count++; +- queue_log_writer(p_s_sb); ++ queue_log_writer(sb); + goto relock; + } + } +@@ -3070,25 +3070,25 @@ static int do_journal_begin_r(struct rei + if (atomic_read(&journal->j_jlock)) { + while (journal->j_trans_id == old_trans_id && + atomic_read(&journal->j_jlock)) { +- queue_log_writer(p_s_sb); ++ queue_log_writer(sb); + } + goto relock; + } +- retval = journal_join(&myth, p_s_sb, 1); ++ retval = journal_join(&myth, sb, 1); + if (retval) + goto out_fail; + + /* someone might have ended the transaction while we joined */ + if (old_trans_id != journal->j_trans_id) { +- retval = do_journal_end(&myth, p_s_sb, 1, 0); ++ retval = do_journal_end(&myth, sb, 1, 0); + } else { +- retval = do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW); ++ retval = do_journal_end(&myth, sb, 1, COMMIT_NOW); + } + + if (retval) + goto out_fail; + +- PROC_INFO_INC(p_s_sb, journal.journal_relock_wcount); ++ PROC_INFO_INC(sb, journal.journal_relock_wcount); + goto relock; + } + /* we are the first writer, set trans_id */ +@@ -3100,7 +3100,7 @@ static int do_journal_begin_r(struct rei + th->t_blocks_logged = 0; + th->t_blocks_allocated = nblocks; + th->t_trans_id = journal->j_trans_id; +- unlock_journal(p_s_sb); ++ unlock_journal(sb); + INIT_LIST_HEAD(&th->t_list); + get_fs_excl(); + return 0; +@@ -3110,7 +3110,7 @@ static int do_journal_begin_r(struct rei + /* Re-set th->t_super, so we can properly keep track of how many + * persistent transactions there are. We need to do this so if this + * call is part of a failed restart_transaction, we can free it later */ +- th->t_super = p_s_sb; ++ th->t_super = sb; + return retval; + } + +@@ -3161,7 +3161,7 @@ int reiserfs_end_persistent_transaction( + } + + static int journal_join(struct reiserfs_transaction_handle *th, +- struct super_block *p_s_sb, unsigned long nblocks) ++ struct super_block *sb, unsigned long nblocks) + { + struct reiserfs_transaction_handle *cur_th = current->journal_info; + +@@ -3170,11 +3170,11 @@ static int journal_join(struct reiserfs_ + */ + th->t_handle_save = cur_th; + BUG_ON(cur_th && cur_th->t_refcount > 1); +- return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN); ++ return do_journal_begin_r(th, sb, nblocks, JBEGIN_JOIN); + } + + int journal_join_abort(struct reiserfs_transaction_handle *th, +- struct super_block *p_s_sb, unsigned long nblocks) ++ struct super_block *sb, unsigned long nblocks) + { + struct reiserfs_transaction_handle *cur_th = current->journal_info; + +@@ -3183,11 +3183,11 @@ int journal_join_abort(struct reiserfs_t + */ + th->t_handle_save = cur_th; + BUG_ON(cur_th && cur_th->t_refcount > 1); +- return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT); ++ return do_journal_begin_r(th, sb, nblocks, JBEGIN_ABORT); + } + + int journal_begin(struct reiserfs_transaction_handle *th, +- struct super_block *p_s_sb, unsigned long nblocks) ++ struct super_block *sb, unsigned long nblocks) + { + struct reiserfs_transaction_handle *cur_th = current->journal_info; + int ret; +@@ -3195,12 +3195,12 @@ int journal_begin(struct reiserfs_transa + th->t_handle_save = NULL; + if (cur_th) { + /* we are nesting into the current transaction */ +- if (cur_th->t_super == p_s_sb) { ++ if (cur_th->t_super == sb) { + BUG_ON(!cur_th->t_refcount); + cur_th->t_refcount++; + memcpy(th, cur_th, sizeof(*th)); + if (th->t_refcount <= 1) +- reiserfs_warning(p_s_sb, "reiserfs-2005", ++ reiserfs_warning(sb, "reiserfs-2005", + "BAD: refcount <= 1, but " + "journal_info != 0"); + return 0; +@@ -3209,7 +3209,7 @@ int journal_begin(struct reiserfs_transa + ** save it and restore on journal_end. This should never + ** really happen... + */ +- reiserfs_warning(p_s_sb, "clm-2100", ++ reiserfs_warning(sb, "clm-2100", + "nesting info a different FS"); + th->t_handle_save = current->journal_info; + current->journal_info = th; +@@ -3217,7 +3217,7 @@ int journal_begin(struct reiserfs_transa + } else { + current->journal_info = th; + } +- ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG); ++ ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG); + BUG_ON(current->journal_info != th); + + /* I guess this boils down to being the reciprocal of clm-2100 above. +@@ -3241,28 +3241,28 @@ int journal_begin(struct reiserfs_transa + ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. + */ + int journal_mark_dirty(struct reiserfs_transaction_handle *th, +- struct super_block *p_s_sb, struct buffer_head *bh) ++ struct super_block *sb, struct buffer_head *bh) + { +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + struct reiserfs_journal_cnode *cn = NULL; + int count_already_incd = 0; + int prepared = 0; + BUG_ON(!th->t_trans_id); + +- PROC_INFO_INC(p_s_sb, journal.mark_dirty); ++ PROC_INFO_INC(sb, journal.mark_dirty); + if (th->t_trans_id != journal->j_trans_id) { + reiserfs_panic(th->t_super, "journal-1577", + "handle trans id %ld != current trans id %ld", + th->t_trans_id, journal->j_trans_id); + } + +- p_s_sb->s_dirt = 1; ++ sb->s_dirt = 1; + + prepared = test_clear_buffer_journal_prepared(bh); + clear_buffer_journal_restore_dirty(bh); + /* already in this transaction, we are done */ + if (buffer_journaled(bh)) { +- PROC_INFO_INC(p_s_sb, journal.mark_dirty_already); ++ PROC_INFO_INC(sb, journal.mark_dirty_already); + return 0; + } + +@@ -3271,7 +3271,7 @@ int journal_mark_dirty(struct reiserfs_t + ** could get to disk too early. NOT GOOD. + */ + if (!prepared || buffer_dirty(bh)) { +- reiserfs_warning(p_s_sb, "journal-1777", ++ reiserfs_warning(sb, "journal-1777", + "buffer %llu bad state " + "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT", + (unsigned long long)bh->b_blocknr, +@@ -3282,7 +3282,7 @@ int journal_mark_dirty(struct reiserfs_t + } + + if (atomic_read(&(journal->j_wcount)) <= 0) { +- reiserfs_warning(p_s_sb, "journal-1409", ++ reiserfs_warning(sb, "journal-1409", + "returning because j_wcount was %d", + atomic_read(&(journal->j_wcount))); + return 1; +@@ -3298,7 +3298,7 @@ int journal_mark_dirty(struct reiserfs_t + + if (buffer_journal_dirty(bh)) { + count_already_incd = 1; +- PROC_INFO_INC(p_s_sb, journal.mark_dirty_notjournal); ++ PROC_INFO_INC(sb, journal.mark_dirty_notjournal); + clear_buffer_journal_dirty(bh); + } + +@@ -3310,10 +3310,9 @@ int journal_mark_dirty(struct reiserfs_t + + /* now put this guy on the end */ + if (!cn) { +- cn = get_cnode(p_s_sb); ++ cn = get_cnode(sb); + if (!cn) { +- reiserfs_panic(p_s_sb, "journal-4", +- "get_cnode failed!"); ++ reiserfs_panic(sb, "journal-4", "get_cnode failed!"); + } + + if (th->t_blocks_logged == th->t_blocks_allocated) { +@@ -3325,7 +3324,7 @@ int journal_mark_dirty(struct reiserfs_t + + cn->bh = bh; + cn->blocknr = bh->b_blocknr; +- cn->sb = p_s_sb; ++ cn->sb = sb; + cn->jlist = NULL; + insert_journal_hash(journal->j_hash_table, cn); + if (!count_already_incd) { +@@ -3346,10 +3345,10 @@ int journal_mark_dirty(struct reiserfs_t + } + + int journal_end(struct reiserfs_transaction_handle *th, +- struct super_block *p_s_sb, unsigned long nblocks) ++ struct super_block *sb, unsigned long nblocks) + { + if (!current->journal_info && th->t_refcount > 1) +- reiserfs_warning(p_s_sb, "REISER-NESTING", ++ reiserfs_warning(sb, "REISER-NESTING", + "th NULL, refcount %d", th->t_refcount); + + if (!th->t_trans_id) { +@@ -3373,7 +3372,7 @@ int journal_end(struct reiserfs_transact + } + return 0; + } else { +- return do_journal_end(th, p_s_sb, nblocks, 0); ++ return do_journal_end(th, sb, nblocks, 0); + } + } + +@@ -3384,15 +3383,15 @@ int journal_end(struct reiserfs_transact + ** + ** returns 1 if it cleaned and relsed the buffer. 0 otherwise + */ +-static int remove_from_transaction(struct super_block *p_s_sb, ++static int remove_from_transaction(struct super_block *sb, + b_blocknr_t blocknr, int already_cleaned) + { + struct buffer_head *bh; + struct reiserfs_journal_cnode *cn; +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + int ret = 0; + +- cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr); ++ cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr); + if (!cn || !cn->bh) { + return ret; + } +@@ -3410,7 +3409,7 @@ static int remove_from_transaction(struc + journal->j_last = cn->prev; + } + if (bh) +- remove_journal_hash(p_s_sb, journal->j_hash_table, NULL, ++ remove_journal_hash(sb, journal->j_hash_table, NULL, + bh->b_blocknr, 0); + clear_buffer_journaled(bh); /* don't log this one */ + +@@ -3420,14 +3419,14 @@ static int remove_from_transaction(struc + clear_buffer_journal_test(bh); + put_bh(bh); + if (atomic_read(&(bh->b_count)) < 0) { +- reiserfs_warning(p_s_sb, "journal-1752", ++ reiserfs_warning(sb, "journal-1752", + "b_count < 0"); + } + ret = 1; + } + journal->j_len--; + journal->j_len_alloc--; +- free_cnode(p_s_sb, cn); ++ free_cnode(sb, cn); + return ret; + } + +@@ -3478,19 +3477,19 @@ static int can_dirty(struct reiserfs_jou + ** will wait until the current transaction is done/committed before returning + */ + int journal_end_sync(struct reiserfs_transaction_handle *th, +- struct super_block *p_s_sb, unsigned long nblocks) ++ struct super_block *sb, unsigned long nblocks) + { +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + + BUG_ON(!th->t_trans_id); + /* you can sync while nested, very, very bad */ + BUG_ON(th->t_refcount > 1); + if (journal->j_len == 0) { +- reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), ++ reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), + 1); +- journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)); ++ journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb)); + } +- return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW | WAIT); ++ return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT); + } + + /* +@@ -3500,7 +3499,7 @@ static void flush_async_commits(struct w + { + struct reiserfs_journal *journal = + container_of(work, struct reiserfs_journal, j_work.work); +- struct super_block *p_s_sb = journal->j_work_sb; ++ struct super_block *sb = journal->j_work_sb; + struct reiserfs_journal_list *jl; + struct list_head *entry; + +@@ -3509,7 +3508,7 @@ static void flush_async_commits(struct w + /* last entry is the youngest, commit it and you get everything */ + entry = journal->j_journal_list.prev; + jl = JOURNAL_LIST_ENTRY(entry); +- flush_commit_list(p_s_sb, jl, 1); ++ flush_commit_list(sb, jl, 1); + } + unlock_kernel(); + } +@@ -3518,11 +3517,11 @@ static void flush_async_commits(struct w + ** flushes any old transactions to disk + ** ends the current transaction if it is too old + */ +-int reiserfs_flush_old_commits(struct super_block *p_s_sb) ++int reiserfs_flush_old_commits(struct super_block *sb) + { + time_t now; + struct reiserfs_transaction_handle th; +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + + now = get_seconds(); + /* safety check so we don't flush while we are replaying the log during +@@ -3539,20 +3538,20 @@ int reiserfs_flush_old_commits(struct su + journal->j_trans_start_time > 0 && + journal->j_len > 0 && + (now - journal->j_trans_start_time) > journal->j_max_trans_age) { +- if (!journal_join(&th, p_s_sb, 1)) { +- reiserfs_prepare_for_journal(p_s_sb, +- SB_BUFFER_WITH_SB(p_s_sb), ++ if (!journal_join(&th, sb, 1)) { ++ reiserfs_prepare_for_journal(sb, ++ SB_BUFFER_WITH_SB(sb), + 1); +- journal_mark_dirty(&th, p_s_sb, +- SB_BUFFER_WITH_SB(p_s_sb)); ++ journal_mark_dirty(&th, sb, ++ SB_BUFFER_WITH_SB(sb)); + + /* we're only being called from kreiserfsd, it makes no sense to do + ** an async commit so that kreiserfsd can do it later + */ +- do_journal_end(&th, p_s_sb, 1, COMMIT_NOW | WAIT); ++ do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT); + } + } +- return p_s_sb->s_dirt; ++ return sb->s_dirt; + } + + /* +@@ -3567,7 +3566,7 @@ int reiserfs_flush_old_commits(struct su + ** Note, we can't allow the journal_end to proceed while there are still writers in the log. + */ + static int check_journal_end(struct reiserfs_transaction_handle *th, +- struct super_block *p_s_sb, unsigned long nblocks, ++ struct super_block *sb, unsigned long nblocks, + int flags) + { + +@@ -3576,7 +3575,7 @@ static int check_journal_end(struct reis + int commit_now = flags & COMMIT_NOW; + int wait_on_commit = flags & WAIT; + struct reiserfs_journal_list *jl; +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + + BUG_ON(!th->t_trans_id); + +@@ -3615,31 +3614,31 @@ static int check_journal_end(struct reis + if (flush) { + journal->j_next_full_flush = 1; + } +- unlock_journal(p_s_sb); ++ unlock_journal(sb); + + /* sleep while the current transaction is still j_jlocked */ + while (journal->j_trans_id == trans_id) { + if (atomic_read(&journal->j_jlock)) { +- queue_log_writer(p_s_sb); ++ queue_log_writer(sb); + } else { +- lock_journal(p_s_sb); ++ lock_journal(sb); + if (journal->j_trans_id == trans_id) { + atomic_set(&(journal->j_jlock), + 1); + } +- unlock_journal(p_s_sb); ++ unlock_journal(sb); + } + } + BUG_ON(journal->j_trans_id == trans_id); + + if (commit_now +- && journal_list_still_alive(p_s_sb, trans_id) ++ && journal_list_still_alive(sb, trans_id) + && wait_on_commit) { +- flush_commit_list(p_s_sb, jl, 1); ++ flush_commit_list(sb, jl, 1); + } + return 0; + } +- unlock_journal(p_s_sb); ++ unlock_journal(sb); + return 0; + } + +@@ -3656,12 +3655,12 @@ static int check_journal_end(struct reis + && journal->j_len_alloc < journal->j_max_batch + && journal->j_cnode_free > (journal->j_trans_max * 3)) { + journal->j_bcount++; +- unlock_journal(p_s_sb); ++ unlock_journal(sb); + return 0; + } + +- if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { +- reiserfs_panic(p_s_sb, "journal-003", ++ if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(sb)) { ++ reiserfs_panic(sb, "journal-003", + "j_start (%ld) is too high", + journal->j_start); + } +@@ -3683,16 +3682,16 @@ static int check_journal_end(struct reis + ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list. + */ + int journal_mark_freed(struct reiserfs_transaction_handle *th, +- struct super_block *p_s_sb, b_blocknr_t blocknr) ++ struct super_block *sb, b_blocknr_t blocknr) + { +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + struct reiserfs_journal_cnode *cn = NULL; + struct buffer_head *bh = NULL; + struct reiserfs_list_bitmap *jb = NULL; + int cleaned = 0; + BUG_ON(!th->t_trans_id); + +- cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr); ++ cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr); + if (cn && cn->bh) { + bh = cn->bh; + get_bh(bh); +@@ -3702,15 +3701,15 @@ int journal_mark_freed(struct reiserfs_t + clear_buffer_journal_new(bh); + clear_prepared_bits(bh); + reiserfs_clean_and_file_buffer(bh); +- cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned); ++ cleaned = remove_from_transaction(sb, blocknr, cleaned); + } else { + /* set the bit for this block in the journal bitmap for this transaction */ + jb = journal->j_current_jl->j_list_bitmap; + if (!jb) { +- reiserfs_panic(p_s_sb, "journal-1702", ++ reiserfs_panic(sb, "journal-1702", + "journal_list_bitmap is NULL"); + } +- set_bit_in_list_bitmap(p_s_sb, blocknr, jb); ++ set_bit_in_list_bitmap(sb, blocknr, jb); + + /* Note, the entire while loop is not allowed to schedule. */ + +@@ -3718,13 +3717,13 @@ int journal_mark_freed(struct reiserfs_t + clear_prepared_bits(bh); + reiserfs_clean_and_file_buffer(bh); + } +- cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned); ++ cleaned = remove_from_transaction(sb, blocknr, cleaned); + + /* find all older transactions with this block, make sure they don't try to write it out */ +- cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, ++ cn = get_journal_hash_dev(sb, journal->j_list_hash_table, + blocknr); + while (cn) { +- if (p_s_sb == cn->sb && blocknr == cn->blocknr) { ++ if (sb == cn->sb && blocknr == cn->blocknr) { + set_bit(BLOCK_FREED, &cn->state); + if (cn->bh) { + if (!cleaned) { +@@ -3740,7 +3739,7 @@ int journal_mark_freed(struct reiserfs_t + put_bh(cn->bh); + if (atomic_read + (&(cn->bh->b_count)) < 0) { +- reiserfs_warning(p_s_sb, ++ reiserfs_warning(sb, + "journal-2138", + "cn->bh->b_count < 0"); + } +@@ -3847,18 +3846,18 @@ int reiserfs_commit_for_inode(struct ino + return __commit_trans_jl(inode, id, jl); + } + +-void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb, ++void reiserfs_restore_prepared_buffer(struct super_block *sb, + struct buffer_head *bh) + { +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); +- PROC_INFO_INC(p_s_sb, journal.restore_prepared); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); ++ PROC_INFO_INC(sb, journal.restore_prepared); + if (!bh) { + return; + } + if (test_clear_buffer_journal_restore_dirty(bh) && + buffer_journal_dirty(bh)) { + struct reiserfs_journal_cnode *cn; +- cn = get_journal_hash_dev(p_s_sb, ++ cn = get_journal_hash_dev(sb, + journal->j_list_hash_table, + bh->b_blocknr); + if (cn && can_dirty(cn)) { +@@ -3877,10 +3876,10 @@ extern struct tree_balance *cur_tb; + ** wait on it. + ** + */ +-int reiserfs_prepare_for_journal(struct super_block *p_s_sb, ++int reiserfs_prepare_for_journal(struct super_block *sb, + struct buffer_head *bh, int wait) + { +- PROC_INFO_INC(p_s_sb, journal.prepare); ++ PROC_INFO_INC(sb, journal.prepare); + + if (!trylock_buffer(bh)) { + if (!wait) +@@ -3928,10 +3927,10 @@ static void flush_old_journal_lists(stru + ** journal lists, etc just won't happen. + */ + static int do_journal_end(struct reiserfs_transaction_handle *th, +- struct super_block *p_s_sb, unsigned long nblocks, ++ struct super_block *sb, unsigned long nblocks, + int flags) + { +- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); ++ struct reiserfs_journal *journal = SB_JOURNAL(sb); + struct reiserfs_journal_cnode *cn, *next, *jl_cn; + struct reiserfs_journal_cnode *last_cn = NULL; + struct reiserfs_journal_desc *desc; +@@ -3961,14 +3960,14 @@ static int do_journal_end(struct reiserf + + put_fs_excl(); + current->journal_info = th->t_handle_save; +- reiserfs_check_lock_depth(p_s_sb, "journal end"); ++ reiserfs_check_lock_depth(sb, "journal end"); + if (journal->j_len == 0) { +- reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), ++ reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), + 1); +- journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)); ++ journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb)); + } + +- lock_journal(p_s_sb); ++ lock_journal(sb); + if (journal->j_next_full_flush) { + flags |= FLUSH_ALL; + flush = 1; +@@ -3981,10 +3980,10 @@ static int do_journal_end(struct reiserf + /* check_journal_end locks the journal, and unlocks if it does not return 1 + ** it tells us if we should continue with the journal_end, or just return + */ +- if (!check_journal_end(th, p_s_sb, nblocks, flags)) { +- p_s_sb->s_dirt = 1; +- wake_queued_writers(p_s_sb); +- reiserfs_async_progress_wait(p_s_sb); ++ if (!check_journal_end(th, sb, nblocks, flags)) { ++ sb->s_dirt = 1; ++ wake_queued_writers(sb); ++ reiserfs_async_progress_wait(sb); + goto out; + } + +@@ -4013,8 +4012,8 @@ static int do_journal_end(struct reiserf + + /* setup description block */ + d_bh = +- journal_getblk(p_s_sb, +- SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + ++ journal_getblk(sb, ++ SB_ONDISK_JOURNAL_1st_BLOCK(sb) + + journal->j_start); + set_buffer_uptodate(d_bh); + desc = (struct reiserfs_journal_desc *)(d_bh)->b_data; +@@ -4023,9 +4022,9 @@ static int do_journal_end(struct reiserf + set_desc_trans_id(desc, journal->j_trans_id); + + /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ +- c_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + ++ c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) + + ((journal->j_start + journal->j_len + +- 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))); ++ 1) % SB_ONDISK_JOURNAL_SIZE(sb))); + commit = (struct reiserfs_journal_commit *)c_bh->b_data; + memset(c_bh->b_data, 0, c_bh->b_size); + set_commit_trans_id(commit, journal->j_trans_id); +@@ -4058,12 +4057,12 @@ static int do_journal_end(struct reiserf + ** for each real block, add it to the journal list hash, + ** copy into real block index array in the commit or desc block + */ +- trans_half = journal_trans_half(p_s_sb->s_blocksize); ++ trans_half = journal_trans_half(sb->s_blocksize); + for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) { + if (buffer_journaled(cn->bh)) { +- jl_cn = get_cnode(p_s_sb); ++ jl_cn = get_cnode(sb); + if (!jl_cn) { +- reiserfs_panic(p_s_sb, "journal-1676", ++ reiserfs_panic(sb, "journal-1676", + "get_cnode returned NULL"); + } + if (i == 0) { +@@ -4079,15 +4078,15 @@ static int do_journal_end(struct reiserf + of journal or reserved area */ + + if (is_block_in_log_or_reserved_area +- (p_s_sb, cn->bh->b_blocknr)) { +- reiserfs_panic(p_s_sb, "journal-2332", ++ (sb, cn->bh->b_blocknr)) { ++ reiserfs_panic(sb, "journal-2332", + "Trying to log block %lu, " + "which is a log block", + cn->bh->b_blocknr); + } + jl_cn->blocknr = cn->bh->b_blocknr; + jl_cn->state = 0; +- jl_cn->sb = p_s_sb; ++ jl_cn->sb = sb; + jl_cn->bh = cn->bh; + jl_cn->jlist = jl; + insert_journal_hash(journal->j_list_hash_table, jl_cn); +@@ -4128,11 +4127,11 @@ static int do_journal_end(struct reiserf + char *addr; + struct page *page; + tmp_bh = +- journal_getblk(p_s_sb, +- SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + ++ journal_getblk(sb, ++ SB_ONDISK_JOURNAL_1st_BLOCK(sb) + + ((cur_write_start + + jindex) % +- SB_ONDISK_JOURNAL_SIZE(p_s_sb))); ++ SB_ONDISK_JOURNAL_SIZE(sb))); + set_buffer_uptodate(tmp_bh); + page = cn->bh->b_page; + addr = kmap(page); +@@ -4146,13 +4145,13 @@ static int do_journal_end(struct reiserf + clear_buffer_journaled(cn->bh); + } else { + /* JDirty cleared sometime during transaction. don't log this one */ +- reiserfs_warning(p_s_sb, "journal-2048", ++ reiserfs_warning(sb, "journal-2048", + "BAD, buffer in journal hash, " + "but not JDirty!"); + brelse(cn->bh); + } + next = cn->next; +- free_cnode(p_s_sb, cn); ++ free_cnode(sb, cn); + cn = next; + cond_resched(); + } +@@ -4162,7 +4161,7 @@ static int do_journal_end(struct reiserf + ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. + */ + +- journal->j_current_jl = alloc_journal_list(p_s_sb); ++ journal->j_current_jl = alloc_journal_list(sb); + + /* now it is safe to insert this transaction on the main list */ + list_add_tail(&jl->j_list, &journal->j_journal_list); +@@ -4173,7 +4172,7 @@ static int do_journal_end(struct reiserf + old_start = journal->j_start; + journal->j_start = + (journal->j_start + journal->j_len + +- 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb); ++ 2) % SB_ONDISK_JOURNAL_SIZE(sb); + atomic_set(&(journal->j_wcount), 0); + journal->j_bcount = 0; + journal->j_last = NULL; +@@ -4188,7 +4187,7 @@ static int do_journal_end(struct reiserf + journal->j_len_alloc = 0; + journal->j_next_full_flush = 0; + journal->j_next_async_flush = 0; +- init_journal_hash(p_s_sb); ++ init_journal_hash(sb); + + // make sure reiserfs_add_jh sees the new current_jl before we + // write out the tails +@@ -4217,8 +4216,8 @@ static int do_journal_end(struct reiserf + ** queue don't wait for this proc to flush journal lists and such. + */ + if (flush) { +- flush_commit_list(p_s_sb, jl, 1); +- flush_journal_list(p_s_sb, jl, 1); ++ flush_commit_list(sb, jl, 1); ++ flush_journal_list(sb, jl, 1); + } else if (!(jl->j_state & LIST_COMMIT_PENDING)) + queue_delayed_work(commit_wq, &journal->j_work, HZ / 10); + +@@ -4232,11 +4231,11 @@ static int do_journal_end(struct reiserf + if (journal->j_start <= temp_jl->j_start) { + if ((journal->j_start + journal->j_trans_max + 1) >= + temp_jl->j_start) { +- flush_used_journal_lists(p_s_sb, temp_jl); ++ flush_used_journal_lists(sb, temp_jl); + goto first_jl; + } else if ((journal->j_start + + journal->j_trans_max + 1) < +- SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { ++ SB_ONDISK_JOURNAL_SIZE(sb)) { + /* if we don't cross into the next transaction and we don't + * wrap, there is no way we can overlap any later transactions + * break now +@@ -4245,11 +4244,11 @@ static int do_journal_end(struct reiserf + } + } else if ((journal->j_start + + journal->j_trans_max + 1) > +- SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { ++ SB_ONDISK_JOURNAL_SIZE(sb)) { + if (((journal->j_start + journal->j_trans_max + 1) % +- SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >= ++ SB_ONDISK_JOURNAL_SIZE(sb)) >= + temp_jl->j_start) { +- flush_used_journal_lists(p_s_sb, temp_jl); ++ flush_used_journal_lists(sb, temp_jl); + goto first_jl; + } else { + /* we don't overlap anything from out start to the end of the +@@ -4260,34 +4259,34 @@ static int do_journal_end(struct reiserf + } + } + } +- flush_old_journal_lists(p_s_sb); ++ flush_old_journal_lists(sb); + + journal->j_current_jl->j_list_bitmap = +- get_list_bitmap(p_s_sb, journal->j_current_jl); ++ get_list_bitmap(sb, journal->j_current_jl); + + if (!(journal->j_current_jl->j_list_bitmap)) { +- reiserfs_panic(p_s_sb, "journal-1996", ++ reiserfs_panic(sb, "journal-1996", + "could not get a list bitmap"); + } + + atomic_set(&(journal->j_jlock), 0); +- unlock_journal(p_s_sb); ++ unlock_journal(sb); + /* wake up any body waiting to join. */ + clear_bit(J_WRITERS_QUEUED, &journal->j_state); + wake_up(&(journal->j_join_wait)); + + if (!flush && wait_on_commit && +- journal_list_still_alive(p_s_sb, commit_trans_id)) { +- flush_commit_list(p_s_sb, jl, 1); ++ journal_list_still_alive(sb, commit_trans_id)) { ++ flush_commit_list(sb, jl, 1); + } + out: +- reiserfs_check_lock_depth(p_s_sb, "journal end2"); ++ reiserfs_check_lock_depth(sb, "journal end2"); + + memset(th, 0, sizeof(*th)); + /* Re-set th->t_super, so we can properly keep track of how many + * persistent transactions there are. We need to do this so if this + * call is part of a failed restart_transaction, we can free it later */ +- th->t_super = p_s_sb; ++ th->t_super = sb; + + return journal->j_errno; + } +--- a/fs/reiserfs/stree.c ++++ b/fs/reiserfs/stree.c +@@ -245,7 +245,7 @@ static const struct reiserfs_key MAX_KEY + static inline const struct reiserfs_key *get_lkey(const struct treepath + *p_s_chk_path, + const struct super_block +- *p_s_sb) ++ *sb) + { + int n_position, n_path_offset = p_s_chk_path->path_length; + struct buffer_head *p_s_parent; +@@ -282,14 +282,14 @@ static inline const struct reiserfs_key + } + /* Return MIN_KEY if we are in the root of the buffer tree. */ + if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)-> +- b_blocknr == SB_ROOT_BLOCK(p_s_sb)) ++ b_blocknr == SB_ROOT_BLOCK(sb)) + return &MIN_KEY; + return &MAX_KEY; + } + + /* Get delimiting key of the buffer at the path and its right neighbor. */ + inline const struct reiserfs_key *get_rkey(const struct treepath *p_s_chk_path, +- const struct super_block *p_s_sb) ++ const struct super_block *sb) + { + int n_position, n_path_offset = p_s_chk_path->path_length; + struct buffer_head *p_s_parent; +@@ -325,7 +325,7 @@ inline const struct reiserfs_key *get_rk + } + /* Return MAX_KEY if we are in the root of the buffer tree. */ + if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)-> +- b_blocknr == SB_ROOT_BLOCK(p_s_sb)) ++ b_blocknr == SB_ROOT_BLOCK(sb)) + return &MAX_KEY; + return &MIN_KEY; + } +@@ -337,7 +337,7 @@ inline const struct reiserfs_key *get_rk + this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */ + static inline int key_in_buffer(struct treepath *p_s_chk_path, /* Path which should be checked. */ + const struct cpu_key *p_s_key, /* Key which should be checked. */ +- struct super_block *p_s_sb /* Super block pointer. */ ++ struct super_block *sb /* Super block pointer. */ + ) + { + +@@ -348,11 +348,11 @@ static inline int key_in_buffer(struct t + RFALSE(!PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev, + "PAP-5060: device must not be NODEV"); + +- if (comp_keys(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1) ++ if (comp_keys(get_lkey(p_s_chk_path, sb), p_s_key) == 1) + /* left delimiting key is bigger, that the key we look for */ + return 0; +- // if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 ) +- if (comp_keys(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1) ++ // if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, sb)) != -1 ) ++ if (comp_keys(get_rkey(p_s_chk_path, sb), p_s_key) != 1) + /* p_s_key must be less than right delimitiing key */ + return 0; + return 1; +@@ -546,7 +546,7 @@ static void search_by_key_reada(struct s + /************************************************************************** + * Algorithm SearchByKey * + * look for item in the Disk S+Tree by its key * +- * Input: p_s_sb - super block * ++ * Input: sb - super block * + * p_s_key - pointer to the key to search * + * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR * + * p_s_search_path - path from the root to the needed leaf * +@@ -566,7 +566,7 @@ static void search_by_key_reada(struct s + correctness of the top of the path but need not be checked for the + correctness of the bottom of the path */ + /* The function is NOT SCHEDULE-SAFE! */ +-int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key, /* Key to search. */ ++int search_by_key(struct super_block *sb, const struct cpu_key *p_s_key, /* Key to search. */ + struct treepath *p_s_search_path,/* This structure was + allocated and initialized + by the calling +@@ -592,7 +592,7 @@ int search_by_key(struct super_block *p_ + int n_repeat_counter = 0; + #endif + +- PROC_INFO_INC(p_s_sb, search_by_key); ++ PROC_INFO_INC(sb, search_by_key); + + /* As we add each node to a path we increase its count. This means that + we must be careful to release all nodes in a path before we either +@@ -605,13 +605,13 @@ int search_by_key(struct super_block *p_ + /* With each iteration of this loop we search through the items in the + current node, and calculate the next current node(next path element) + for the next iteration of this loop.. */ +- n_block_number = SB_ROOT_BLOCK(p_s_sb); ++ n_block_number = SB_ROOT_BLOCK(sb); + expected_level = -1; + while (1) { + + #ifdef CONFIG_REISERFS_CHECK + if (!(++n_repeat_counter % 50000)) +- reiserfs_warning(p_s_sb, "PAP-5100", ++ reiserfs_warning(sb, "PAP-5100", + "%s: there were %d iterations of " + "while loop looking for key %K", + current->comm, n_repeat_counter, +@@ -622,14 +622,14 @@ int search_by_key(struct super_block *p_ + p_s_last_element = + PATH_OFFSET_PELEMENT(p_s_search_path, + ++p_s_search_path->path_length); +- fs_gen = get_generation(p_s_sb); ++ fs_gen = get_generation(sb); + + /* Read the next tree node, and set the last element in the path to + have a pointer to it. */ + if ((p_s_bh = p_s_last_element->pe_buffer = +- sb_getblk(p_s_sb, n_block_number))) { ++ sb_getblk(sb, n_block_number))) { + if (!buffer_uptodate(p_s_bh) && reada_count > 1) { +- search_by_key_reada(p_s_sb, reada_bh, ++ search_by_key_reada(sb, reada_bh, + reada_blocks, reada_count); + } + ll_rw_block(READ, 1, &p_s_bh); +@@ -644,25 +644,25 @@ int search_by_key(struct super_block *p_ + } + reada_count = 0; + if (expected_level == -1) +- expected_level = SB_TREE_HEIGHT(p_s_sb); ++ expected_level = SB_TREE_HEIGHT(sb); + expected_level--; + + /* It is possible that schedule occurred. We must check whether the key + to search is still in the tree rooted from the current buffer. If + not then repeat search from the root. */ +- if (fs_changed(fs_gen, p_s_sb) && ++ if (fs_changed(fs_gen, sb) && + (!B_IS_IN_TREE(p_s_bh) || + B_LEVEL(p_s_bh) != expected_level || +- !key_in_buffer(p_s_search_path, p_s_key, p_s_sb))) { +- PROC_INFO_INC(p_s_sb, search_by_key_fs_changed); +- PROC_INFO_INC(p_s_sb, search_by_key_restarted); +- PROC_INFO_INC(p_s_sb, ++ !key_in_buffer(p_s_search_path, p_s_key, sb))) { ++ PROC_INFO_INC(sb, search_by_key_fs_changed); ++ PROC_INFO_INC(sb, search_by_key_restarted); ++ PROC_INFO_INC(sb, + sbk_restarted[expected_level - 1]); + pathrelse(p_s_search_path); + + /* Get the root block number so that we can repeat the search + starting from the root. */ +- n_block_number = SB_ROOT_BLOCK(p_s_sb); ++ n_block_number = SB_ROOT_BLOCK(sb); + expected_level = -1; + right_neighbor_of_leaf_node = 0; + +@@ -674,12 +674,12 @@ int search_by_key(struct super_block *p_ + equal to the MAX_KEY. Latter case is only possible in + "finish_unfinished()" processing during mount. */ + RFALSE(comp_keys(&MAX_KEY, p_s_key) && +- !key_in_buffer(p_s_search_path, p_s_key, p_s_sb), ++ !key_in_buffer(p_s_search_path, p_s_key, sb), + "PAP-5130: key is not in the buffer"); + #ifdef CONFIG_REISERFS_CHECK + if (cur_tb) { + print_cur_tb("5140"); +- reiserfs_panic(p_s_sb, "PAP-5140", ++ reiserfs_panic(sb, "PAP-5140", + "schedule occurred in do_balance!"); + } + #endif +@@ -687,7 +687,7 @@ int search_by_key(struct super_block *p_ + // make sure, that the node contents look like a node of + // certain level + if (!is_tree_node(p_s_bh, expected_level)) { +- reiserfs_error(p_s_sb, "vs-5150", ++ reiserfs_error(sb, "vs-5150", + "invalid format found in block %ld. " + "Fsck?", p_s_bh->b_blocknr); + pathrelse(p_s_search_path); +@@ -697,7 +697,7 @@ int search_by_key(struct super_block *p_ + /* ok, we have acquired next formatted node in the tree */ + n_node_level = B_LEVEL(p_s_bh); + +- PROC_INFO_BH_STAT(p_s_sb, p_s_bh, n_node_level - 1); ++ PROC_INFO_BH_STAT(sb, p_s_bh, n_node_level - 1); + + RFALSE(n_node_level < n_stop_level, + "vs-5152: tree level (%d) is less than stop level (%d)", +@@ -776,7 +776,7 @@ int search_by_key(struct super_block *p_ + units of directory entries. */ + + /* The function is NOT SCHEDULE-SAFE! */ +-int search_for_position_by_key(struct super_block *p_s_sb, /* Pointer to the super block. */ ++int search_for_position_by_key(struct super_block *sb, /* Pointer to the super block. */ + const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */ + struct treepath *p_s_search_path /* Filled up by this function. */ + ) +@@ -789,13 +789,13 @@ int search_for_position_by_key(struct su + + /* If searching for directory entry. */ + if (is_direntry_cpu_key(p_cpu_key)) +- return search_by_entry_key(p_s_sb, p_cpu_key, p_s_search_path, ++ return search_by_entry_key(sb, p_cpu_key, p_s_search_path, + &de); + + /* If not searching for directory entry. */ + + /* If item is found. */ +- retval = search_item(p_s_sb, p_cpu_key, p_s_search_path); ++ retval = search_item(sb, p_cpu_key, p_s_search_path); + if (retval == IO_ERROR) + return retval; + if (retval == ITEM_FOUND) { +@@ -817,7 +817,7 @@ int search_for_position_by_key(struct su + p_le_ih = + B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), + --PATH_LAST_POSITION(p_s_search_path)); +- n_blk_size = p_s_sb->s_blocksize; ++ n_blk_size = sb->s_blocksize; + + if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) { + return FILE_NOT_FOUND; +@@ -957,7 +957,7 @@ static char prepare_for_delete_or_cut(st + int *p_n_cut_size, unsigned long long n_new_file_length /* MAX_KEY_OFFSET in case of delete. */ + ) + { +- struct super_block *p_s_sb = inode->i_sb; ++ struct super_block *sb = inode->i_sb; + struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_path); + struct buffer_head *p_s_bh = PATH_PLAST_BUFFER(p_s_path); + +@@ -986,7 +986,7 @@ static char prepare_for_delete_or_cut(st + + /* Case of an indirect item. */ + { +- int blk_size = p_s_sb->s_blocksize; ++ int blk_size = sb->s_blocksize; + struct item_head s_ih; + int need_re_search; + int delete = 0; +@@ -1023,9 +1023,9 @@ static char prepare_for_delete_or_cut(st + block = get_block_num(unfm, 0); + + if (block != 0) { +- reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1); ++ reiserfs_prepare_for_journal(sb, p_s_bh, 1); + put_block_num(unfm, 0, 0); +- journal_mark_dirty (th, p_s_sb, p_s_bh); ++ journal_mark_dirty (th, sb, p_s_bh); + reiserfs_free_block(th, inode, block, 1); + } + +@@ -1049,9 +1049,9 @@ static char prepare_for_delete_or_cut(st + /* a trick. If the buffer has been logged, this will do nothing. If + ** we've broken the loop without logging it, it will restore the + ** buffer */ +- reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh); ++ reiserfs_restore_prepared_buffer(sb, p_s_bh); + } while (need_re_search && +- search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_FOUND); ++ search_for_position_by_key(sb, p_s_item_key, p_s_path) == POSITION_FOUND); + pos_in_item(p_s_path) = pos * UNFM_P_SIZE; + + if (*p_n_cut_size == 0) { +@@ -1090,7 +1090,7 @@ static int calc_deleted_bytes_number(str + + static void init_tb_struct(struct reiserfs_transaction_handle *th, + struct tree_balance *p_s_tb, +- struct super_block *p_s_sb, ++ struct super_block *sb, + struct treepath *p_s_path, int n_size) + { + +@@ -1098,7 +1098,7 @@ static void init_tb_struct(struct reiser + + memset(p_s_tb, '\0', sizeof(struct tree_balance)); + p_s_tb->transaction_handle = th; +- p_s_tb->tb_sb = p_s_sb; ++ p_s_tb->tb_sb = sb; + p_s_tb->tb_path = p_s_path; + PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL; + PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0; +@@ -1147,7 +1147,7 @@ int reiserfs_delete_item(struct reiserfs + struct inode *p_s_inode, /* inode is here just to update i_blocks and quotas */ + struct buffer_head *p_s_un_bh) + { /* NULL or unformatted node pointer. */ +- struct super_block *p_s_sb = p_s_inode->i_sb; ++ struct super_block *sb = p_s_inode->i_sb; + struct tree_balance s_del_balance; + struct item_head s_ih; + struct item_head *q_ih; +@@ -1161,7 +1161,7 @@ int reiserfs_delete_item(struct reiserfs + + BUG_ON(!th->t_trans_id); + +- init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path, ++ init_tb_struct(th, &s_del_balance, sb, p_s_path, + 0 /*size is unknown */ ); + + while (1) { +@@ -1185,15 +1185,15 @@ int reiserfs_delete_item(struct reiserfs + if (n_ret_value != REPEAT_SEARCH) + break; + +- PROC_INFO_INC(p_s_sb, delete_item_restarted); ++ PROC_INFO_INC(sb, delete_item_restarted); + + // file system changed, repeat search + n_ret_value = +- search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path); ++ search_for_position_by_key(sb, p_s_item_key, p_s_path); + if (n_ret_value == IO_ERROR) + break; + if (n_ret_value == FILE_NOT_FOUND) { +- reiserfs_warning(p_s_sb, "vs-5340", ++ reiserfs_warning(sb, "vs-5340", + "no items of the file %K found", + p_s_item_key); + break; +@@ -1216,8 +1216,8 @@ int reiserfs_delete_item(struct reiserfs + ** the unfm node once + */ + if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(q_ih)) { +- if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) { +- quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE; ++ if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) { ++ quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; + } else { + quota_cut_bytes = 0; + } +@@ -1258,7 +1258,7 @@ int reiserfs_delete_item(struct reiserfs + do_balance(&s_del_balance, NULL, NULL, M_DELETE); + + #ifdef REISERQUOTA_DEBUG +- reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, ++ reiserfs_debug(sb, REISERFS_DEBUG_CODE, + "reiserquota delete_item(): freeing %u, id=%u type=%c", + quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih)); + #endif +@@ -1430,8 +1430,8 @@ static int maybe_indirect_to_direct(stru + const struct cpu_key *p_s_item_key, + loff_t n_new_file_size, char *p_c_mode) + { +- struct super_block *p_s_sb = p_s_inode->i_sb; +- int n_block_size = p_s_sb->s_blocksize; ++ struct super_block *sb = p_s_inode->i_sb; ++ int n_block_size = sb->s_blocksize; + int cut_bytes; + BUG_ON(!th->t_trans_id); + BUG_ON(n_new_file_size != p_s_inode->i_size); +@@ -1509,7 +1509,7 @@ int reiserfs_cut_from_item(struct reiser + struct inode *p_s_inode, + struct page *page, loff_t n_new_file_size) + { +- struct super_block *p_s_sb = p_s_inode->i_sb; ++ struct super_block *sb = p_s_inode->i_sb; + /* Every function which is going to call do_balance must first + create a tree_balance structure. Then it must fill up this + structure by using the init_tb_struct and fix_nodes functions. +@@ -1560,7 +1560,7 @@ int reiserfs_cut_from_item(struct reiser + /* removing of last unformatted node will change value we + have to return to truncate. Save it */ + retval2 = n_ret_value; +- /*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1)); */ ++ /*retval2 = sb->s_blocksize - (n_new_file_size & (sb->s_blocksize - 1)); */ + + /* So, we have performed the first part of the conversion: + inserting the new direct item. Now we are removing the +@@ -1569,16 +1569,16 @@ int reiserfs_cut_from_item(struct reiser + set_cpu_key_k_type(p_s_item_key, TYPE_INDIRECT); + p_s_item_key->key_length = 4; + n_new_file_size -= +- (n_new_file_size & (p_s_sb->s_blocksize - 1)); ++ (n_new_file_size & (sb->s_blocksize - 1)); + tail_pos = n_new_file_size; + set_cpu_key_k_offset(p_s_item_key, n_new_file_size + 1); + if (search_for_position_by_key +- (p_s_sb, p_s_item_key, ++ (sb, p_s_item_key, + p_s_path) == POSITION_NOT_FOUND) { + print_block(PATH_PLAST_BUFFER(p_s_path), 3, + PATH_LAST_POSITION(p_s_path) - 1, + PATH_LAST_POSITION(p_s_path) + 1); +- reiserfs_panic(p_s_sb, "PAP-5580", "item to " ++ reiserfs_panic(sb, "PAP-5580", "item to " + "convert does not exist (%K)", + p_s_item_key); + } +@@ -1595,14 +1595,14 @@ int reiserfs_cut_from_item(struct reiser + if (n_ret_value != REPEAT_SEARCH) + break; + +- PROC_INFO_INC(p_s_sb, cut_from_item_restarted); ++ PROC_INFO_INC(sb, cut_from_item_restarted); + + n_ret_value = +- search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path); ++ search_for_position_by_key(sb, p_s_item_key, p_s_path); + if (n_ret_value == POSITION_FOUND) + continue; + +- reiserfs_warning(p_s_sb, "PAP-5610", "item %K not found", ++ reiserfs_warning(sb, "PAP-5610", "item %K not found", + p_s_item_key); + unfix_nodes(&s_cut_balance); + return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT; +@@ -1616,7 +1616,7 @@ int reiserfs_cut_from_item(struct reiser + indirect_to_direct_roll_back(th, p_s_inode, p_s_path); + } + if (n_ret_value == NO_DISK_SPACE) +- reiserfs_warning(p_s_sb, "reiserfs-5092", ++ reiserfs_warning(sb, "reiserfs-5092", + "NO_DISK_SPACE"); + unfix_nodes(&s_cut_balance); + return -EIO; +@@ -1642,11 +1642,11 @@ int reiserfs_cut_from_item(struct reiser + p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path); + if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) { + if (c_mode == M_DELETE && +- (le_ih_k_offset(p_le_ih) & (p_s_sb->s_blocksize - 1)) == ++ (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) == + 1) { + // FIXME: this is to keep 3.5 happy + REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX; +- quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE; ++ quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; + } else { + quota_cut_bytes = 0; + } +@@ -1659,18 +1659,18 @@ int reiserfs_cut_from_item(struct reiser + sure, that we exactly remove last unformatted node pointer + of the item */ + if (!is_indirect_le_ih(le_ih)) +- reiserfs_panic(p_s_sb, "vs-5652", ++ reiserfs_panic(sb, "vs-5652", + "item must be indirect %h", le_ih); + + if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE) +- reiserfs_panic(p_s_sb, "vs-5653", "completing " ++ reiserfs_panic(sb, "vs-5653", "completing " + "indirect2direct conversion indirect " + "item %h being deleted must be of " + "4 byte long", le_ih); + + if (c_mode == M_CUT + && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) { +- reiserfs_panic(p_s_sb, "vs-5654", "can not complete " ++ reiserfs_panic(sb, "vs-5654", "can not complete " + "indirect2direct conversion of %h " + "(CUT, insert_size==%d)", + le_ih, s_cut_balance.insert_size[0]); +--- a/fs/reiserfs/tail_conversion.c ++++ b/fs/reiserfs/tail_conversion.c +@@ -175,9 +175,9 @@ int indirect2direct(struct reiserfs_tran + loff_t n_new_file_size, /* New file size. */ + char *p_c_mode) + { +- struct super_block *p_s_sb = p_s_inode->i_sb; ++ struct super_block *sb = p_s_inode->i_sb; + struct item_head s_ih; +- unsigned long n_block_size = p_s_sb->s_blocksize; ++ unsigned long n_block_size = sb->s_blocksize; + char *tail; + int tail_len, round_tail_len; + loff_t pos, pos1; /* position of first byte of the tail */ +@@ -185,7 +185,7 @@ int indirect2direct(struct reiserfs_tran + + BUG_ON(!th->t_trans_id); + +- REISERFS_SB(p_s_sb)->s_indirect2direct++; ++ REISERFS_SB(sb)->s_indirect2direct++; + + *p_c_mode = M_SKIP_BALANCING; + +@@ -200,7 +200,7 @@ int indirect2direct(struct reiserfs_tran + + pos = + le_ih_k_offset(&s_ih) - 1 + (ih_item_len(&s_ih) / UNFM_P_SIZE - +- 1) * p_s_sb->s_blocksize; ++ 1) * sb->s_blocksize; + pos1 = pos; + + // we are protected by i_mutex. The tail can not disapper, not +@@ -211,18 +211,18 @@ int indirect2direct(struct reiserfs_tran + + if (path_changed(&s_ih, p_s_path)) { + /* re-search indirect item */ +- if (search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) ++ if (search_for_position_by_key(sb, p_s_item_key, p_s_path) + == POSITION_NOT_FOUND) +- reiserfs_panic(p_s_sb, "PAP-5520", ++ reiserfs_panic(sb, "PAP-5520", + "item to be converted %K does not exist", + p_s_item_key); + copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); + #ifdef CONFIG_REISERFS_CHECK + pos = le_ih_k_offset(&s_ih) - 1 + + (ih_item_len(&s_ih) / UNFM_P_SIZE - +- 1) * p_s_sb->s_blocksize; ++ 1) * sb->s_blocksize; + if (pos != pos1) +- reiserfs_panic(p_s_sb, "vs-5530", "tail position " ++ reiserfs_panic(sb, "vs-5530", "tail position " + "changed while we were reading it"); + #endif + } +--- a/include/linux/reiserfs_fs.h ++++ b/include/linux/reiserfs_fs.h +@@ -1769,12 +1769,12 @@ int journal_end_sync(struct reiserfs_tra + int journal_mark_freed(struct reiserfs_transaction_handle *, + struct super_block *, b_blocknr_t blocknr); + int journal_transaction_should_end(struct reiserfs_transaction_handle *, int); +-int reiserfs_in_journal(struct super_block *p_s_sb, unsigned int bmap_nr, +- int bit_nr, int searchall, b_blocknr_t *next); ++int reiserfs_in_journal(struct super_block *sb, unsigned int bmap_nr, ++ int bit_nr, int searchall, b_blocknr_t *next); + int journal_begin(struct reiserfs_transaction_handle *, +- struct super_block *p_s_sb, unsigned long); ++ struct super_block *sb, unsigned long); + int journal_join_abort(struct reiserfs_transaction_handle *, +- struct super_block *p_s_sb, unsigned long); ++ struct super_block *sb, unsigned long); + void reiserfs_abort_journal(struct super_block *sb, int errno); + void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...); + int reiserfs_allocate_list_bitmaps(struct super_block *s, +@@ -1830,11 +1830,11 @@ static inline void copy_key(struct reise + + int comp_items(const struct item_head *stored_ih, const struct treepath *p_s_path); + const struct reiserfs_key *get_rkey(const struct treepath *p_s_chk_path, +- const struct super_block *p_s_sb); ++ const struct super_block *sb); + int search_by_key(struct super_block *, const struct cpu_key *, + struct treepath *, int); + #define search_item(s,key,path) search_by_key (s, key, path, DISK_LEAF_NODE_LEVEL) +-int search_for_position_by_key(struct super_block *p_s_sb, ++int search_for_position_by_key(struct super_block *sb, + const struct cpu_key *p_s_cpu_key, + struct treepath *p_s_search_path); + extern void decrement_bcount(struct buffer_head *p_s_bh); +@@ -1978,7 +1978,7 @@ int reiserfs_global_version_in_proc(char + #define PROC_INFO_MAX( sb, field, value ) VOID_V + #define PROC_INFO_INC( sb, field ) VOID_V + #define PROC_INFO_ADD( sb, field, val ) VOID_V +-#define PROC_INFO_BH_STAT( p_s_sb, p_s_bh, n_node_level ) VOID_V ++#define PROC_INFO_BH_STAT(sb, p_s_bh, n_node_level) VOID_V + #endif + + /* dir.c */ diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_s_tb.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_s_tb.diff new file mode 100644 index 0000000000..c6b420e1b7 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-rename-p_s_tb.diff @@ -0,0 +1,1040 @@ +From: Jeff Mahoney +Subject: reiserfs: rename p_s_tb to tb + + This patch is a simple s/p_s_tb/tb/g to the reiserfs code. This is the fourth + in a series of patches to rip out some of the awful variable naming in + reiserfs. + +Signed-off-by: Jeff Mahoney + +--- + + fs/reiserfs/fix_node.c | 482 ++++++++++++++++++++++---------------------- + fs/reiserfs/stree.c | 21 + + include/linux/reiserfs_fs.h | 2 + 3 files changed, 254 insertions(+), 251 deletions(-) + +--- a/fs/reiserfs/fix_node.c ++++ b/fs/reiserfs/fix_node.c +@@ -749,26 +749,26 @@ else \ + -1, -1);\ + } + +-static void free_buffers_in_tb(struct tree_balance *p_s_tb) ++static void free_buffers_in_tb(struct tree_balance *tb) + { + int n_counter; + +- pathrelse(p_s_tb->tb_path); ++ pathrelse(tb->tb_path); + + for (n_counter = 0; n_counter < MAX_HEIGHT; n_counter++) { +- brelse(p_s_tb->L[n_counter]); +- brelse(p_s_tb->R[n_counter]); +- brelse(p_s_tb->FL[n_counter]); +- brelse(p_s_tb->FR[n_counter]); +- brelse(p_s_tb->CFL[n_counter]); +- brelse(p_s_tb->CFR[n_counter]); +- +- p_s_tb->L[n_counter] = NULL; +- p_s_tb->R[n_counter] = NULL; +- p_s_tb->FL[n_counter] = NULL; +- p_s_tb->FR[n_counter] = NULL; +- p_s_tb->CFL[n_counter] = NULL; +- p_s_tb->CFR[n_counter] = NULL; ++ brelse(tb->L[n_counter]); ++ brelse(tb->R[n_counter]); ++ brelse(tb->FL[n_counter]); ++ brelse(tb->FR[n_counter]); ++ brelse(tb->CFL[n_counter]); ++ brelse(tb->CFR[n_counter]); ++ ++ tb->L[n_counter] = NULL; ++ tb->R[n_counter] = NULL; ++ tb->FL[n_counter] = NULL; ++ tb->FR[n_counter] = NULL; ++ tb->CFL[n_counter] = NULL; ++ tb->CFR[n_counter] = NULL; + } + } + +@@ -778,14 +778,14 @@ static void free_buffers_in_tb(struct tr + * NO_DISK_SPACE - no disk space. + */ + /* The function is NOT SCHEDULE-SAFE! */ +-static int get_empty_nodes(struct tree_balance *p_s_tb, int n_h) ++static int get_empty_nodes(struct tree_balance *tb, int n_h) + { + struct buffer_head *p_s_new_bh, +- *p_s_Sh = PATH_H_PBUFFER(p_s_tb->tb_path, n_h); ++ *p_s_Sh = PATH_H_PBUFFER(tb->tb_path, n_h); + b_blocknr_t *p_n_blocknr, a_n_blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; + int n_counter, n_number_of_freeblk, n_amount_needed, /* number of needed empty blocks */ + n_retval = CARRY_ON; +- struct super_block *sb = p_s_tb->tb_sb; ++ struct super_block *sb = tb->tb_sb; + + /* number_of_freeblk is the number of empty blocks which have been + acquired for use by the balancing algorithm minus the number of +@@ -803,15 +803,15 @@ static int get_empty_nodes(struct tree_b + the analysis or 0 if not restarted, then subtract the amount needed + by all of the levels of the tree below n_h. */ + /* blknum includes S[n_h], so we subtract 1 in this calculation */ +- for (n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum; ++ for (n_counter = 0, n_number_of_freeblk = tb->cur_blknum; + n_counter < n_h; n_counter++) + n_number_of_freeblk -= +- (p_s_tb->blknum[n_counter]) ? (p_s_tb->blknum[n_counter] - ++ (tb->blknum[n_counter]) ? (tb->blknum[n_counter] - + 1) : 0; + + /* Allocate missing empty blocks. */ + /* if p_s_Sh == 0 then we are getting a new root */ +- n_amount_needed = (p_s_Sh) ? (p_s_tb->blknum[n_h] - 1) : 1; ++ n_amount_needed = (p_s_Sh) ? (tb->blknum[n_h] - 1) : 1; + /* Amount_needed = the amount that we need more than the amount that we have. */ + if (n_amount_needed > n_number_of_freeblk) + n_amount_needed -= n_number_of_freeblk; +@@ -819,7 +819,7 @@ static int get_empty_nodes(struct tree_b + return CARRY_ON; + + /* No need to check quota - is not allocated for blocks used for formatted nodes */ +- if (reiserfs_new_form_blocknrs(p_s_tb, a_n_blocknrs, ++ if (reiserfs_new_form_blocknrs(tb, a_n_blocknrs, + n_amount_needed) == NO_DISK_SPACE) + return NO_DISK_SPACE; + +@@ -838,14 +838,14 @@ static int get_empty_nodes(struct tree_b + p_s_new_bh); + + /* Put empty buffers into the array. */ +- RFALSE(p_s_tb->FEB[p_s_tb->cur_blknum], ++ RFALSE(tb->FEB[tb->cur_blknum], + "PAP-8141: busy slot for new buffer"); + + set_buffer_journal_new(p_s_new_bh); +- p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh; ++ tb->FEB[tb->cur_blknum++] = p_s_new_bh; + } + +- if (n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB(p_s_tb)) ++ if (n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB(tb)) + n_retval = REPEAT_SEARCH; + + return n_retval; +@@ -896,33 +896,34 @@ static int get_rfree(struct tree_balance + } + + /* Check whether left neighbor is in memory. */ +-static int is_left_neighbor_in_cache(struct tree_balance *p_s_tb, int n_h) ++static int is_left_neighbor_in_cache(struct tree_balance *tb, int n_h) + { + struct buffer_head *p_s_father, *left; +- struct super_block *sb = p_s_tb->tb_sb; ++ struct super_block *sb = tb->tb_sb; + b_blocknr_t n_left_neighbor_blocknr; + int n_left_neighbor_position; + +- if (!p_s_tb->FL[n_h]) /* Father of the left neighbor does not exist. */ ++ /* Father of the left neighbor does not exist. */ ++ if (!tb->FL[n_h]) + return 0; + + /* Calculate father of the node to be balanced. */ +- p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1); ++ p_s_father = PATH_H_PBUFFER(tb->tb_path, n_h + 1); + + RFALSE(!p_s_father || + !B_IS_IN_TREE(p_s_father) || +- !B_IS_IN_TREE(p_s_tb->FL[n_h]) || ++ !B_IS_IN_TREE(tb->FL[n_h]) || + !buffer_uptodate(p_s_father) || +- !buffer_uptodate(p_s_tb->FL[n_h]), ++ !buffer_uptodate(tb->FL[n_h]), + "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", +- p_s_father, p_s_tb->FL[n_h]); ++ p_s_father, tb->FL[n_h]); + + /* Get position of the pointer to the left neighbor into the left father. */ +- n_left_neighbor_position = (p_s_father == p_s_tb->FL[n_h]) ? +- p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->FL[n_h]); ++ n_left_neighbor_position = (p_s_father == tb->FL[n_h]) ? ++ tb->lkey[n_h] : B_NR_ITEMS(tb->FL[n_h]); + /* Get left neighbor block number. */ + n_left_neighbor_blocknr = +- B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position); ++ B_N_CHILD_NUM(tb->FL[n_h], n_left_neighbor_position); + /* Look for the left neighbor in the cache. */ + if ((left = sb_find_get_block(sb, n_left_neighbor_blocknr))) { + +@@ -953,14 +954,14 @@ static void decrement_key(struct cpu_key + SCHEDULE_OCCURRED - schedule occurred while the function worked; + * CARRY_ON - schedule didn't occur while the function worked; + */ +-static int get_far_parent(struct tree_balance *p_s_tb, ++static int get_far_parent(struct tree_balance *tb, + int n_h, + struct buffer_head **pp_s_father, + struct buffer_head **pp_s_com_father, char c_lr_par) + { + struct buffer_head *p_s_parent; + INITIALIZE_PATH(s_path_to_neighbor_father); +- struct treepath *p_s_path = p_s_tb->tb_path; ++ struct treepath *p_s_path = tb->tb_path; + struct cpu_key s_lr_father_key; + int n_counter, + n_position = INT_MAX, +@@ -1005,9 +1006,9 @@ static int get_far_parent(struct tree_ba + if (n_counter == FIRST_PATH_ELEMENT_OFFSET) { + /* Check whether first buffer in the path is the root of the tree. */ + if (PATH_OFFSET_PBUFFER +- (p_s_tb->tb_path, ++ (tb->tb_path, + FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == +- SB_ROOT_BLOCK(p_s_tb->tb_sb)) { ++ SB_ROOT_BLOCK(tb->tb_sb)) { + *pp_s_father = *pp_s_com_father = NULL; + return CARRY_ON; + } +@@ -1022,7 +1023,7 @@ static int get_far_parent(struct tree_ba + + if (buffer_locked(*pp_s_com_father)) { + __wait_on_buffer(*pp_s_com_father); +- if (FILESYSTEM_CHANGED_TB(p_s_tb)) { ++ if (FILESYSTEM_CHANGED_TB(tb)) { + brelse(*pp_s_com_father); + return REPEAT_SEARCH; + } +@@ -1035,9 +1036,9 @@ static int get_far_parent(struct tree_ba + le_key2cpu_key(&s_lr_father_key, + B_N_PDELIM_KEY(*pp_s_com_father, + (c_lr_par == +- LEFT_PARENTS) ? (p_s_tb->lkey[n_h - 1] = ++ LEFT_PARENTS) ? (tb->lkey[n_h - 1] = + n_position - +- 1) : (p_s_tb->rkey[n_h - ++ 1) : (tb->rkey[n_h - + 1] = + n_position))); + +@@ -1045,12 +1046,12 @@ static int get_far_parent(struct tree_ba + decrement_key(&s_lr_father_key); + + if (search_by_key +- (p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, ++ (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, + n_h + 1) == IO_ERROR) + // path is released + return IO_ERROR; + +- if (FILESYSTEM_CHANGED_TB(p_s_tb)) { ++ if (FILESYSTEM_CHANGED_TB(tb)) { + pathrelse(&s_path_to_neighbor_father); + brelse(*pp_s_com_father); + return REPEAT_SEARCH; +@@ -1075,24 +1076,26 @@ static int get_far_parent(struct tree_ba + * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; + * CARRY_ON - schedule didn't occur while the function worked; + */ +-static int get_parents(struct tree_balance *p_s_tb, int n_h) ++static int get_parents(struct tree_balance *tb, int n_h) + { +- struct treepath *p_s_path = p_s_tb->tb_path; ++ struct treepath *p_s_path = tb->tb_path; + int n_position, + n_ret_value, +- n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h); ++ n_path_offset = PATH_H_PATH_OFFSET(tb->tb_path, n_h); + struct buffer_head *p_s_curf, *p_s_curcf; + + /* Current node is the root of the tree or will be root of the tree */ + if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) { + /* The root can not have parents. + Release nodes which previously were obtained as parents of the current node neighbors. */ +- brelse(p_s_tb->FL[n_h]); +- brelse(p_s_tb->CFL[n_h]); +- brelse(p_s_tb->FR[n_h]); +- brelse(p_s_tb->CFR[n_h]); +- p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] = +- p_s_tb->CFR[n_h] = NULL; ++ brelse(tb->FL[n_h]); ++ brelse(tb->CFL[n_h]); ++ brelse(tb->FR[n_h]); ++ brelse(tb->CFR[n_h]); ++ tb->FL[n_h] = NULL; ++ tb->CFL[n_h] = NULL; ++ tb->FR[n_h] = NULL; ++ tb->CFR[n_h] = NULL; + return CARRY_ON; + } + +@@ -1104,22 +1107,22 @@ static int get_parents(struct tree_balan + PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1); + get_bh(p_s_curf); + get_bh(p_s_curf); +- p_s_tb->lkey[n_h] = n_position - 1; ++ tb->lkey[n_h] = n_position - 1; + } else { + /* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node. + Calculate current common parent of L[n_path_offset] and the current node. Note that + CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset]. + Calculate lkey[n_path_offset]. */ +- if ((n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf, ++ if ((n_ret_value = get_far_parent(tb, n_h + 1, &p_s_curf, + &p_s_curcf, + LEFT_PARENTS)) != CARRY_ON) + return n_ret_value; + } + +- brelse(p_s_tb->FL[n_h]); +- p_s_tb->FL[n_h] = p_s_curf; /* New initialization of FL[n_h]. */ +- brelse(p_s_tb->CFL[n_h]); +- p_s_tb->CFL[n_h] = p_s_curcf; /* New initialization of CFL[n_h]. */ ++ brelse(tb->FL[n_h]); ++ tb->FL[n_h] = p_s_curf; /* New initialization of FL[n_h]. */ ++ brelse(tb->CFL[n_h]); ++ tb->CFL[n_h] = p_s_curcf; /* New initialization of CFL[n_h]. */ + + RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) || + (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)), +@@ -1133,7 +1136,7 @@ static int get_parents(struct tree_balan + Calculate current common parent of R[n_h] and current node. Note that CFR[n_h] + not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */ + if ((n_ret_value = +- get_far_parent(p_s_tb, n_h + 1, &p_s_curf, &p_s_curcf, ++ get_far_parent(tb, n_h + 1, &p_s_curf, &p_s_curcf, + RIGHT_PARENTS)) != CARRY_ON) + return n_ret_value; + } else { +@@ -1143,14 +1146,16 @@ static int get_parents(struct tree_balan + PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1); + get_bh(p_s_curf); + get_bh(p_s_curf); +- p_s_tb->rkey[n_h] = n_position; ++ tb->rkey[n_h] = n_position; + } + +- brelse(p_s_tb->FR[n_h]); +- p_s_tb->FR[n_h] = p_s_curf; /* New initialization of FR[n_path_offset]. */ ++ brelse(tb->FR[n_h]); ++ /* New initialization of FR[n_path_offset]. */ ++ tb->FR[n_h] = p_s_curf; + +- brelse(p_s_tb->CFR[n_h]); +- p_s_tb->CFR[n_h] = p_s_curcf; /* New initialization of CFR[n_path_offset]. */ ++ brelse(tb->CFR[n_h]); ++ /* New initialization of CFR[n_path_offset]. */ ++ tb->CFR[n_h] = p_s_curcf; + + RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) || + (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)), +@@ -1885,12 +1890,12 @@ static int check_balance(int mode, + } + + /* Check whether parent at the path is the really parent of the current node.*/ +-static int get_direct_parent(struct tree_balance *p_s_tb, int n_h) ++static int get_direct_parent(struct tree_balance *tb, int n_h) + { + struct buffer_head *bh; +- struct treepath *p_s_path = p_s_tb->tb_path; ++ struct treepath *p_s_path = tb->tb_path; + int n_position, +- n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h); ++ n_path_offset = PATH_H_PATH_OFFSET(tb->tb_path, n_h); + + /* We are in the root or in the new root. */ + if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) { +@@ -1899,7 +1904,7 @@ static int get_direct_parent(struct tree + "PAP-8260: invalid offset in the path"); + + if (PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)-> +- b_blocknr == SB_ROOT_BLOCK(p_s_tb->tb_sb)) { ++ b_blocknr == SB_ROOT_BLOCK(tb->tb_sb)) { + /* Root is not changed. */ + PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL; + PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0; +@@ -1924,7 +1929,7 @@ static int get_direct_parent(struct tree + + if (buffer_locked(bh)) { + __wait_on_buffer(bh); +- if (FILESYSTEM_CHANGED_TB(p_s_tb)) ++ if (FILESYSTEM_CHANGED_TB(tb)) + return REPEAT_SEARCH; + } + +@@ -1937,85 +1942,86 @@ static int get_direct_parent(struct tree + * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; + * CARRY_ON - schedule didn't occur while the function worked; + */ +-static int get_neighbors(struct tree_balance *p_s_tb, int n_h) ++static int get_neighbors(struct tree_balance *tb, int n_h) + { + int n_child_position, +- n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1); ++ n_path_offset = PATH_H_PATH_OFFSET(tb->tb_path, n_h + 1); + unsigned long n_son_number; +- struct super_block *sb = p_s_tb->tb_sb; ++ struct super_block *sb = tb->tb_sb; + struct buffer_head *bh; + + PROC_INFO_INC(sb, get_neighbors[n_h]); + +- if (p_s_tb->lnum[n_h]) { ++ if (tb->lnum[n_h]) { + /* We need left neighbor to balance S[n_h]. */ + PROC_INFO_INC(sb, need_l_neighbor[n_h]); +- bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); ++ bh = PATH_OFFSET_PBUFFER(tb->tb_path, n_path_offset); + +- RFALSE(bh == p_s_tb->FL[n_h] && +- !PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset), ++ RFALSE(bh == tb->FL[n_h] && ++ !PATH_OFFSET_POSITION(tb->tb_path, n_path_offset), + "PAP-8270: invalid position in the parent"); + + n_child_position = + (bh == +- p_s_tb->FL[n_h]) ? p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb-> ++ tb->FL[n_h]) ? tb->lkey[n_h] : B_NR_ITEMS(tb-> + FL[n_h]); +- n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position); ++ n_son_number = B_N_CHILD_NUM(tb->FL[n_h], n_child_position); + bh = sb_bread(sb, n_son_number); + if (!bh) + return IO_ERROR; +- if (FILESYSTEM_CHANGED_TB(p_s_tb)) { ++ if (FILESYSTEM_CHANGED_TB(tb)) { + brelse(bh); + PROC_INFO_INC(sb, get_neighbors_restart[n_h]); + return REPEAT_SEARCH; + } + +- RFALSE(!B_IS_IN_TREE(p_s_tb->FL[n_h]) || +- n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) || +- B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) != ++ RFALSE(!B_IS_IN_TREE(tb->FL[n_h]) || ++ n_child_position > B_NR_ITEMS(tb->FL[n_h]) || ++ B_N_CHILD_NUM(tb->FL[n_h], n_child_position) != + bh->b_blocknr, "PAP-8275: invalid parent"); + RFALSE(!B_IS_IN_TREE(bh), "PAP-8280: invalid child"); + RFALSE(!n_h && + B_FREE_SPACE(bh) != + MAX_CHILD_SIZE(bh) - +- dc_size(B_N_CHILD(p_s_tb->FL[0], n_child_position)), ++ dc_size(B_N_CHILD(tb->FL[0], n_child_position)), + "PAP-8290: invalid child size of left neighbor"); + +- brelse(p_s_tb->L[n_h]); +- p_s_tb->L[n_h] = bh; ++ brelse(tb->L[n_h]); ++ tb->L[n_h] = bh; + } + +- if (p_s_tb->rnum[n_h]) { /* We need right neighbor to balance S[n_path_offset]. */ ++ /* We need right neighbor to balance S[n_path_offset]. */ ++ if (tb->rnum[n_h]) { + PROC_INFO_INC(sb, need_r_neighbor[n_h]); +- bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); ++ bh = PATH_OFFSET_PBUFFER(tb->tb_path, n_path_offset); + +- RFALSE(bh == p_s_tb->FR[n_h] && +- PATH_OFFSET_POSITION(p_s_tb->tb_path, ++ RFALSE(bh == tb->FR[n_h] && ++ PATH_OFFSET_POSITION(tb->tb_path, + n_path_offset) >= + B_NR_ITEMS(bh), + "PAP-8295: invalid position in the parent"); + + n_child_position = +- (bh == p_s_tb->FR[n_h]) ? p_s_tb->rkey[n_h] + 1 : 0; +- n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position); ++ (bh == tb->FR[n_h]) ? tb->rkey[n_h] + 1 : 0; ++ n_son_number = B_N_CHILD_NUM(tb->FR[n_h], n_child_position); + bh = sb_bread(sb, n_son_number); + if (!bh) + return IO_ERROR; +- if (FILESYSTEM_CHANGED_TB(p_s_tb)) { ++ if (FILESYSTEM_CHANGED_TB(tb)) { + brelse(bh); + PROC_INFO_INC(sb, get_neighbors_restart[n_h]); + return REPEAT_SEARCH; + } +- brelse(p_s_tb->R[n_h]); +- p_s_tb->R[n_h] = bh; ++ brelse(tb->R[n_h]); ++ tb->R[n_h] = bh; + + RFALSE(!n_h + && B_FREE_SPACE(bh) != + MAX_CHILD_SIZE(bh) - +- dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)), ++ dc_size(B_N_CHILD(tb->FR[0], n_child_position)), + "PAP-8300: invalid child size of right neighbor (%d != %d - %d)", + B_FREE_SPACE(bh), MAX_CHILD_SIZE(bh), +- dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position))); ++ dc_size(B_N_CHILD(tb->FR[0], n_child_position))); + + } + return CARRY_ON; +@@ -2139,7 +2145,7 @@ static int clear_all_dirty_bits(struct s + return reiserfs_prepare_for_journal(s, bh, 0); + } + +-static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb) ++static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) + { + struct buffer_head *locked; + #ifdef CONFIG_REISERFS_CHECK +@@ -2151,95 +2157,94 @@ static int wait_tb_buffers_until_unlocke + + locked = NULL; + +- for (i = p_s_tb->tb_path->path_length; ++ for (i = tb->tb_path->path_length; + !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) { +- if (PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) { ++ if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) { + /* if I understand correctly, we can only be sure the last buffer + ** in the path is in the tree --clm + */ + #ifdef CONFIG_REISERFS_CHECK +- if (PATH_PLAST_BUFFER(p_s_tb->tb_path) == +- PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) { +- tb_buffer_sanity_check(p_s_tb->tb_sb, ++ if (PATH_PLAST_BUFFER(tb->tb_path) == ++ PATH_OFFSET_PBUFFER(tb->tb_path, i)) ++ tb_buffer_sanity_check(tb->tb_sb, + PATH_OFFSET_PBUFFER +- (p_s_tb->tb_path, ++ (tb->tb_path, + i), "S", +- p_s_tb->tb_path-> ++ tb->tb_path-> + path_length - i); +- } + #endif +- if (!clear_all_dirty_bits(p_s_tb->tb_sb, ++ if (!clear_all_dirty_bits(tb->tb_sb, + PATH_OFFSET_PBUFFER +- (p_s_tb->tb_path, ++ (tb->tb_path, + i))) { + locked = +- PATH_OFFSET_PBUFFER(p_s_tb->tb_path, ++ PATH_OFFSET_PBUFFER(tb->tb_path, + i); + } + } + } + +- for (i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i]; ++ for (i = 0; !locked && i < MAX_HEIGHT && tb->insert_size[i]; + i++) { + +- if (p_s_tb->lnum[i]) { ++ if (tb->lnum[i]) { + +- if (p_s_tb->L[i]) { +- tb_buffer_sanity_check(p_s_tb->tb_sb, +- p_s_tb->L[i], ++ if (tb->L[i]) { ++ tb_buffer_sanity_check(tb->tb_sb, ++ tb->L[i], + "L", i); + if (!clear_all_dirty_bits +- (p_s_tb->tb_sb, p_s_tb->L[i])) +- locked = p_s_tb->L[i]; ++ (tb->tb_sb, tb->L[i])) ++ locked = tb->L[i]; + } + +- if (!locked && p_s_tb->FL[i]) { +- tb_buffer_sanity_check(p_s_tb->tb_sb, +- p_s_tb->FL[i], ++ if (!locked && tb->FL[i]) { ++ tb_buffer_sanity_check(tb->tb_sb, ++ tb->FL[i], + "FL", i); + if (!clear_all_dirty_bits +- (p_s_tb->tb_sb, p_s_tb->FL[i])) +- locked = p_s_tb->FL[i]; ++ (tb->tb_sb, tb->FL[i])) ++ locked = tb->FL[i]; + } + +- if (!locked && p_s_tb->CFL[i]) { +- tb_buffer_sanity_check(p_s_tb->tb_sb, +- p_s_tb->CFL[i], ++ if (!locked && tb->CFL[i]) { ++ tb_buffer_sanity_check(tb->tb_sb, ++ tb->CFL[i], + "CFL", i); + if (!clear_all_dirty_bits +- (p_s_tb->tb_sb, p_s_tb->CFL[i])) +- locked = p_s_tb->CFL[i]; ++ (tb->tb_sb, tb->CFL[i])) ++ locked = tb->CFL[i]; + } + + } + +- if (!locked && (p_s_tb->rnum[i])) { ++ if (!locked && (tb->rnum[i])) { + +- if (p_s_tb->R[i]) { +- tb_buffer_sanity_check(p_s_tb->tb_sb, +- p_s_tb->R[i], ++ if (tb->R[i]) { ++ tb_buffer_sanity_check(tb->tb_sb, ++ tb->R[i], + "R", i); + if (!clear_all_dirty_bits +- (p_s_tb->tb_sb, p_s_tb->R[i])) +- locked = p_s_tb->R[i]; ++ (tb->tb_sb, tb->R[i])) ++ locked = tb->R[i]; + } + +- if (!locked && p_s_tb->FR[i]) { +- tb_buffer_sanity_check(p_s_tb->tb_sb, +- p_s_tb->FR[i], ++ if (!locked && tb->FR[i]) { ++ tb_buffer_sanity_check(tb->tb_sb, ++ tb->FR[i], + "FR", i); + if (!clear_all_dirty_bits +- (p_s_tb->tb_sb, p_s_tb->FR[i])) +- locked = p_s_tb->FR[i]; ++ (tb->tb_sb, tb->FR[i])) ++ locked = tb->FR[i]; + } + +- if (!locked && p_s_tb->CFR[i]) { +- tb_buffer_sanity_check(p_s_tb->tb_sb, +- p_s_tb->CFR[i], ++ if (!locked && tb->CFR[i]) { ++ tb_buffer_sanity_check(tb->tb_sb, ++ tb->CFR[i], + "CFR", i); + if (!clear_all_dirty_bits +- (p_s_tb->tb_sb, p_s_tb->CFR[i])) +- locked = p_s_tb->CFR[i]; ++ (tb->tb_sb, tb->CFR[i])) ++ locked = tb->CFR[i]; + } + } + } +@@ -2252,10 +2257,10 @@ static int wait_tb_buffers_until_unlocke + ** --clm + */ + for (i = 0; !locked && i < MAX_FEB_SIZE; i++) { +- if (p_s_tb->FEB[i]) { ++ if (tb->FEB[i]) { + if (!clear_all_dirty_bits +- (p_s_tb->tb_sb, p_s_tb->FEB[i])) +- locked = p_s_tb->FEB[i]; ++ (tb->tb_sb, tb->FEB[i])) ++ locked = tb->FEB[i]; + } + } + +@@ -2263,21 +2268,20 @@ static int wait_tb_buffers_until_unlocke + #ifdef CONFIG_REISERFS_CHECK + repeat_counter++; + if ((repeat_counter % 10000) == 0) { +- reiserfs_warning(p_s_tb->tb_sb, "reiserfs-8200", ++ reiserfs_warning(tb->tb_sb, "reiserfs-8200", + "too many iterations waiting " + "for buffer to unlock " + "(%b)", locked); + + /* Don't loop forever. Try to recover from possible error. */ + +- return (FILESYSTEM_CHANGED_TB(p_s_tb)) ? ++ return (FILESYSTEM_CHANGED_TB(tb)) ? + REPEAT_SEARCH : CARRY_ON; + } + #endif + __wait_on_buffer(locked); +- if (FILESYSTEM_CHANGED_TB(p_s_tb)) { ++ if (FILESYSTEM_CHANGED_TB(tb)) + return REPEAT_SEARCH; +- } + } + + } while (locked); +@@ -2307,138 +2311,136 @@ static int wait_tb_buffers_until_unlocke + * tb tree_balance structure; + * inum item number in S[h]; + * pos_in_item - comment this if you can +- * ins_ih & ins_sd are used when inserting ++ * ins_ih item head of item being inserted ++ * data inserted item or data to be pasted + * Returns: 1 - schedule occurred while the function worked; + * 0 - schedule didn't occur while the function worked; + * -1 - if no_disk_space + */ + +-int fix_nodes(int n_op_mode, struct tree_balance *p_s_tb, struct item_head *p_s_ins_ih, // item head of item being inserted +- const void *data // inserted item or data to be pasted +- ) ++int fix_nodes(int n_op_mode, struct tree_balance *tb, ++ struct item_head *p_s_ins_ih, const void *data) + { +- int n_ret_value, n_h, n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path); ++ int n_ret_value, n_h, n_item_num = PATH_LAST_POSITION(tb->tb_path); + int n_pos_in_item; + + /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared + ** during wait_tb_buffers_run + */ + int wait_tb_buffers_run = 0; +- struct buffer_head *p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path); ++ struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); + +- ++REISERFS_SB(p_s_tb->tb_sb)->s_fix_nodes; ++ ++REISERFS_SB(tb->tb_sb)->s_fix_nodes; + +- n_pos_in_item = p_s_tb->tb_path->pos_in_item; ++ n_pos_in_item = tb->tb_path->pos_in_item; + +- p_s_tb->fs_gen = get_generation(p_s_tb->tb_sb); ++ tb->fs_gen = get_generation(tb->tb_sb); + + /* we prepare and log the super here so it will already be in the + ** transaction when do_balance needs to change it. + ** This way do_balance won't have to schedule when trying to prepare + ** the super for logging + */ +- reiserfs_prepare_for_journal(p_s_tb->tb_sb, +- SB_BUFFER_WITH_SB(p_s_tb->tb_sb), 1); +- journal_mark_dirty(p_s_tb->transaction_handle, p_s_tb->tb_sb, +- SB_BUFFER_WITH_SB(p_s_tb->tb_sb)); +- if (FILESYSTEM_CHANGED_TB(p_s_tb)) ++ reiserfs_prepare_for_journal(tb->tb_sb, ++ SB_BUFFER_WITH_SB(tb->tb_sb), 1); ++ journal_mark_dirty(tb->transaction_handle, tb->tb_sb, ++ SB_BUFFER_WITH_SB(tb->tb_sb)); ++ if (FILESYSTEM_CHANGED_TB(tb)) + return REPEAT_SEARCH; + + /* if it possible in indirect_to_direct conversion */ +- if (buffer_locked(p_s_tbS0)) { +- __wait_on_buffer(p_s_tbS0); +- if (FILESYSTEM_CHANGED_TB(p_s_tb)) ++ if (buffer_locked(tbS0)) { ++ __wait_on_buffer(tbS0); ++ if (FILESYSTEM_CHANGED_TB(tb)) + return REPEAT_SEARCH; + } + #ifdef CONFIG_REISERFS_CHECK + if (cur_tb) { + print_cur_tb("fix_nodes"); +- reiserfs_panic(p_s_tb->tb_sb, "PAP-8305", ++ reiserfs_panic(tb->tb_sb, "PAP-8305", + "there is pending do_balance"); + } + +- if (!buffer_uptodate(p_s_tbS0) || !B_IS_IN_TREE(p_s_tbS0)) { +- reiserfs_panic(p_s_tb->tb_sb, "PAP-8320", "S[0] (%b %z) is " ++ if (!buffer_uptodate(tbS0) || !B_IS_IN_TREE(tbS0)) ++ reiserfs_panic(tb->tb_sb, "PAP-8320", "S[0] (%b %z) is " + "not uptodate at the beginning of fix_nodes " + "or not in tree (mode %c)", +- p_s_tbS0, p_s_tbS0, n_op_mode); +- } ++ tbS0, tbS0, n_op_mode); + + /* Check parameters. */ + switch (n_op_mode) { + case M_INSERT: +- if (n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0)) +- reiserfs_panic(p_s_tb->tb_sb, "PAP-8330", "Incorrect " ++ if (n_item_num <= 0 || n_item_num > B_NR_ITEMS(tbS0)) ++ reiserfs_panic(tb->tb_sb, "PAP-8330", "Incorrect " + "item number %d (in S0 - %d) in case " + "of insert", n_item_num, +- B_NR_ITEMS(p_s_tbS0)); ++ B_NR_ITEMS(tbS0)); + break; + case M_PASTE: + case M_DELETE: + case M_CUT: +- if (n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0)) { +- print_block(p_s_tbS0, 0, -1, -1); +- reiserfs_panic(p_s_tb->tb_sb, "PAP-8335", "Incorrect " ++ if (n_item_num < 0 || n_item_num >= B_NR_ITEMS(tbS0)) { ++ print_block(tbS0, 0, -1, -1); ++ reiserfs_panic(tb->tb_sb, "PAP-8335", "Incorrect " + "item number(%d); mode = %c " + "insert_size = %d", + n_item_num, n_op_mode, +- p_s_tb->insert_size[0]); ++ tb->insert_size[0]); + } + break; + default: +- reiserfs_panic(p_s_tb->tb_sb, "PAP-8340", "Incorrect mode " ++ reiserfs_panic(tb->tb_sb, "PAP-8340", "Incorrect mode " + "of operation"); + } + #endif + +- if (get_mem_for_virtual_node(p_s_tb) == REPEAT_SEARCH) ++ if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH) + // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat + return REPEAT_SEARCH; + + /* Starting from the leaf level; for all levels n_h of the tree. */ +- for (n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++) { +- if ((n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON) { ++ for (n_h = 0; n_h < MAX_HEIGHT && tb->insert_size[n_h]; n_h++) { ++ n_ret_value = get_direct_parent(tb, n_h); ++ if (n_ret_value != CARRY_ON) + goto repeat; +- } + +- if ((n_ret_value = +- check_balance(n_op_mode, p_s_tb, n_h, n_item_num, +- n_pos_in_item, p_s_ins_ih, +- data)) != CARRY_ON) { ++ n_ret_value = check_balance(n_op_mode, tb, n_h, n_item_num, ++ n_pos_in_item, p_s_ins_ih, data); ++ if (n_ret_value != CARRY_ON) { + if (n_ret_value == NO_BALANCING_NEEDED) { + /* No balancing for higher levels needed. */ +- if ((n_ret_value = +- get_neighbors(p_s_tb, n_h)) != CARRY_ON) { ++ n_ret_value = get_neighbors(tb, n_h); ++ if (n_ret_value != CARRY_ON) + goto repeat; +- } + if (n_h != MAX_HEIGHT - 1) +- p_s_tb->insert_size[n_h + 1] = 0; ++ tb->insert_size[n_h + 1] = 0; + /* ok, analysis and resource gathering are complete */ + break; + } + goto repeat; + } + +- if ((n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON) { ++ n_ret_value = get_neighbors(tb, n_h); ++ if (n_ret_value != CARRY_ON) + goto repeat; +- } + +- if ((n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON) { +- goto repeat; /* No disk space, or schedule occurred and +- analysis may be invalid and needs to be redone. */ +- } ++ /* No disk space, or schedule occurred and analysis may be ++ * invalid and needs to be redone. */ ++ n_ret_value = get_empty_nodes(tb, n_h); ++ if (n_ret_value != CARRY_ON) ++ goto repeat; + +- if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h)) { ++ if (!PATH_H_PBUFFER(tb->tb_path, n_h)) { + /* We have a positive insert size but no nodes exist on this + level, this means that we are creating a new root. */ + +- RFALSE(p_s_tb->blknum[n_h] != 1, ++ RFALSE(tb->blknum[n_h] != 1, + "PAP-8350: creating new empty root"); + + if (n_h < MAX_HEIGHT - 1) +- p_s_tb->insert_size[n_h + 1] = 0; +- } else if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1)) { +- if (p_s_tb->blknum[n_h] > 1) { ++ tb->insert_size[n_h + 1] = 0; ++ } else if (!PATH_H_PBUFFER(tb->tb_path, n_h + 1)) { ++ if (tb->blknum[n_h] > 1) { + /* The tree needs to be grown, so this node S[n_h] + which is the root node is split into two nodes, + and a new node (S[n_h+1]) will be created to +@@ -2447,19 +2449,20 @@ int fix_nodes(int n_op_mode, struct tree + RFALSE(n_h == MAX_HEIGHT - 1, + "PAP-8355: attempt to create too high of a tree"); + +- p_s_tb->insert_size[n_h + 1] = ++ tb->insert_size[n_h + 1] = + (DC_SIZE + +- KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) + ++ KEY_SIZE) * (tb->blknum[n_h] - 1) + + DC_SIZE; + } else if (n_h < MAX_HEIGHT - 1) +- p_s_tb->insert_size[n_h + 1] = 0; ++ tb->insert_size[n_h + 1] = 0; + } else +- p_s_tb->insert_size[n_h + 1] = +- (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1); ++ tb->insert_size[n_h + 1] = ++ (DC_SIZE + KEY_SIZE) * (tb->blknum[n_h] - 1); + } + +- if ((n_ret_value = wait_tb_buffers_until_unlocked(p_s_tb)) == CARRY_ON) { +- if (FILESYSTEM_CHANGED_TB(p_s_tb)) { ++ n_ret_value = wait_tb_buffers_until_unlocked(tb); ++ if (n_ret_value == CARRY_ON) { ++ if (FILESYSTEM_CHANGED_TB(tb)) { + wait_tb_buffers_run = 1; + n_ret_value = REPEAT_SEARCH; + goto repeat; +@@ -2482,50 +2485,49 @@ int fix_nodes(int n_op_mode, struct tree + + /* Release path buffers. */ + if (wait_tb_buffers_run) { +- pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path); ++ pathrelse_and_restore(tb->tb_sb, tb->tb_path); + } else { +- pathrelse(p_s_tb->tb_path); ++ pathrelse(tb->tb_path); + } + /* brelse all resources collected for balancing */ + for (i = 0; i < MAX_HEIGHT; i++) { + if (wait_tb_buffers_run) { +- reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, +- p_s_tb->L[i]); +- reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, +- p_s_tb->R[i]); +- reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, +- p_s_tb->FL[i]); +- reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, +- p_s_tb->FR[i]); +- reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, +- p_s_tb-> ++ reiserfs_restore_prepared_buffer(tb->tb_sb, ++ tb->L[i]); ++ reiserfs_restore_prepared_buffer(tb->tb_sb, ++ tb->R[i]); ++ reiserfs_restore_prepared_buffer(tb->tb_sb, ++ tb->FL[i]); ++ reiserfs_restore_prepared_buffer(tb->tb_sb, ++ tb->FR[i]); ++ reiserfs_restore_prepared_buffer(tb->tb_sb, ++ tb-> + CFL[i]); +- reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, +- p_s_tb-> ++ reiserfs_restore_prepared_buffer(tb->tb_sb, ++ tb-> + CFR[i]); + } + +- brelse(p_s_tb->L[i]); +- brelse(p_s_tb->R[i]); +- brelse(p_s_tb->FL[i]); +- brelse(p_s_tb->FR[i]); +- brelse(p_s_tb->CFL[i]); +- brelse(p_s_tb->CFR[i]); +- +- p_s_tb->L[i] = NULL; +- p_s_tb->R[i] = NULL; +- p_s_tb->FL[i] = NULL; +- p_s_tb->FR[i] = NULL; +- p_s_tb->CFL[i] = NULL; +- p_s_tb->CFR[i] = NULL; ++ brelse(tb->L[i]); ++ brelse(tb->R[i]); ++ brelse(tb->FL[i]); ++ brelse(tb->FR[i]); ++ brelse(tb->CFL[i]); ++ brelse(tb->CFR[i]); ++ ++ tb->L[i] = NULL; ++ tb->R[i] = NULL; ++ tb->FL[i] = NULL; ++ tb->FR[i] = NULL; ++ tb->CFL[i] = NULL; ++ tb->CFR[i] = NULL; + } + + if (wait_tb_buffers_run) { + for (i = 0; i < MAX_FEB_SIZE; i++) { +- if (p_s_tb->FEB[i]) { ++ if (tb->FEB[i]) + reiserfs_restore_prepared_buffer +- (p_s_tb->tb_sb, p_s_tb->FEB[i]); +- } ++ (tb->tb_sb, tb->FEB[i]); + } + } + return n_ret_value; +@@ -2533,7 +2535,7 @@ int fix_nodes(int n_op_mode, struct tree + + } + +-/* Anatoly will probably forgive me renaming p_s_tb to tb. I just ++/* Anatoly will probably forgive me renaming tb to tb. I just + wanted to make lines shorter */ + void unfix_nodes(struct tree_balance *tb) + { +--- a/fs/reiserfs/stree.c ++++ b/fs/reiserfs/stree.c +@@ -1063,17 +1063,17 @@ static char prepare_for_delete_or_cut(st + } + + /* Calculate number of bytes which will be deleted or cut during balance */ +-static int calc_deleted_bytes_number(struct tree_balance *p_s_tb, char c_mode) ++static int calc_deleted_bytes_number(struct tree_balance *tb, char c_mode) + { + int n_del_size; +- struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path); ++ struct item_head *p_le_ih = PATH_PITEM_HEAD(tb->tb_path); + + if (is_statdata_le_ih(p_le_ih)) + return 0; + + n_del_size = + (c_mode == +- M_DELETE) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0]; ++ M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0]; + if (is_direntry_le_ih(p_le_ih)) { + // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */ + // we can't use EMPTY_DIR_SIZE, as old format dirs have a different +@@ -1083,25 +1083,26 @@ static int calc_deleted_bytes_number(str + } + + if (is_indirect_le_ih(p_le_ih)) +- n_del_size = (n_del_size / UNFM_P_SIZE) * (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size); // - get_ih_free_space (p_le_ih); ++ n_del_size = (n_del_size / UNFM_P_SIZE) * ++ (PATH_PLAST_BUFFER(tb->tb_path)->b_size); + return n_del_size; + } + + static void init_tb_struct(struct reiserfs_transaction_handle *th, +- struct tree_balance *p_s_tb, ++ struct tree_balance *tb, + struct super_block *sb, + struct treepath *p_s_path, int n_size) + { + + BUG_ON(!th->t_trans_id); + +- memset(p_s_tb, '\0', sizeof(struct tree_balance)); +- p_s_tb->transaction_handle = th; +- p_s_tb->tb_sb = sb; +- p_s_tb->tb_path = p_s_path; ++ memset(tb, '\0', sizeof(struct tree_balance)); ++ tb->transaction_handle = th; ++ tb->tb_sb = sb; ++ tb->tb_path = p_s_path; + PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL; + PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0; +- p_s_tb->insert_size[0] = n_size; ++ tb->insert_size[0] = n_size; + } + + void padd_item(char *item, int total_length, int length) +--- a/include/linux/reiserfs_fs.h ++++ b/include/linux/reiserfs_fs.h +@@ -2004,7 +2004,7 @@ extern const struct address_space_operat + + /* fix_nodes.c */ + +-int fix_nodes(int n_op_mode, struct tree_balance *p_s_tb, ++int fix_nodes(int n_op_mode, struct tree_balance *tb, + struct item_head *p_s_ins_ih, const void *); + void unfix_nodes(struct tree_balance *); + diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-selinux.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-selinux.diff new file mode 100644 index 0000000000..c270b50d81 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-selinux.diff @@ -0,0 +1,316 @@ +From: Jeff Mahoney +Subject: reiserfs: add atomic addition of selinux attributes during inode creation + + Some time ago, some changes were made to make security inode attributes + be atomically written during inode creation. ReiserFS fell behind in this + area, but with the reworking of the xattr code, it's now fairly easy to add. + + The following patch adds the ability for security attributes to be added + automatically during inode creation. + +Signed-off-by: Jeff Mahoney + +-- + fs/reiserfs/inode.c | 16 +++++++++++- + fs/reiserfs/namei.c | 37 +++++++++++++++++++++++++--- + fs/reiserfs/xattr_security.c | 54 +++++++++++++++++++++++++++++++++++++++++ + include/linux/reiserfs_fs.h | 4 ++- + include/linux/reiserfs_xattr.h | 32 ++++++++++++++++++++++++ + 5 files changed, 137 insertions(+), 6 deletions(-) + +--- a/fs/reiserfs/inode.c ++++ b/fs/reiserfs/inode.c +@@ -1756,7 +1756,8 @@ int reiserfs_new_inode(struct reiserfs_t + /* 0 for regular, EMTRY_DIR_SIZE for dirs, + strlen (symname) for symlinks) */ + loff_t i_size, struct dentry *dentry, +- struct inode *inode) ++ struct inode *inode, ++ struct reiserfs_security_handle *security) + { + struct super_block *sb; + INITIALIZE_PATH(path_to_key); +@@ -1934,6 +1935,19 @@ int reiserfs_new_inode(struct reiserfs_t + } else if (IS_PRIVATE(dir)) + inode->i_flags |= S_PRIVATE; + ++ if (security->name) { ++ retval = reiserfs_security_write(th, inode, security); ++ if (retval) { ++ err = retval; ++ reiserfs_check_path(&path_to_key); ++ retval = journal_end(th, th->t_super, ++ th->t_blocks_allocated); ++ if (retval) ++ err = retval; ++ goto out_inserted_sd; ++ } ++ } ++ + insert_inode_hash(inode); + reiserfs_update_sd(th, inode); + reiserfs_check_path(&path_to_key); +--- a/fs/reiserfs/namei.c ++++ b/fs/reiserfs/namei.c +@@ -607,6 +607,7 @@ static int reiserfs_create(struct inode + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + + REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); + struct reiserfs_transaction_handle th; ++ struct reiserfs_security_handle security; + + if (!(inode = new_inode(dir->i_sb))) { + return -ENOMEM; +@@ -614,6 +615,12 @@ static int reiserfs_create(struct inode + new_inode_init(inode, dir, mode); + + jbegin_count += reiserfs_cache_default_acl(dir); ++ retval = reiserfs_security_init(dir, inode, &security); ++ if (retval < 0) { ++ drop_new_inode(inode); ++ return retval; ++ } ++ jbegin_count += retval; + reiserfs_write_lock(dir->i_sb); + + retval = journal_begin(&th, dir->i_sb, jbegin_count); +@@ -624,7 +631,7 @@ static int reiserfs_create(struct inode + + retval = + reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry, +- inode); ++ inode, &security); + if (retval) + goto out_failed; + +@@ -662,6 +669,7 @@ static int reiserfs_mknod(struct inode * + int retval; + struct inode *inode; + struct reiserfs_transaction_handle th; ++ struct reiserfs_security_handle security; + /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ + int jbegin_count = + JOURNAL_PER_BALANCE_CNT * 3 + +@@ -677,6 +685,12 @@ static int reiserfs_mknod(struct inode * + new_inode_init(inode, dir, mode); + + jbegin_count += reiserfs_cache_default_acl(dir); ++ retval = reiserfs_security_init(dir, inode, &security); ++ if (retval < 0) { ++ drop_new_inode(inode); ++ return retval; ++ } ++ jbegin_count += retval; + reiserfs_write_lock(dir->i_sb); + + retval = journal_begin(&th, dir->i_sb, jbegin_count); +@@ -687,7 +701,7 @@ static int reiserfs_mknod(struct inode * + + retval = + reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry, +- inode); ++ inode, &security); + if (retval) { + goto out_failed; + } +@@ -728,6 +742,7 @@ static int reiserfs_mkdir(struct inode * + int retval; + struct inode *inode; + struct reiserfs_transaction_handle th; ++ struct reiserfs_security_handle security; + /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ + int jbegin_count = + JOURNAL_PER_BALANCE_CNT * 3 + +@@ -745,6 +760,12 @@ static int reiserfs_mkdir(struct inode * + new_inode_init(inode, dir, mode); + + jbegin_count += reiserfs_cache_default_acl(dir); ++ retval = reiserfs_security_init(dir, inode, &security); ++ if (retval < 0) { ++ drop_new_inode(inode); ++ return retval; ++ } ++ jbegin_count += retval; + reiserfs_write_lock(dir->i_sb); + + retval = journal_begin(&th, dir->i_sb, jbegin_count); +@@ -761,7 +782,7 @@ static int reiserfs_mkdir(struct inode * + retval = reiserfs_new_inode(&th, dir, mode, NULL /*symlink */ , + old_format_only(dir->i_sb) ? + EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE, +- dentry, inode); ++ dentry, inode, &security); + if (retval) { + dir->i_nlink--; + goto out_failed; +@@ -1002,6 +1023,7 @@ static int reiserfs_symlink(struct inode + char *name; + int item_len; + struct reiserfs_transaction_handle th; ++ struct reiserfs_security_handle security; + int mode = S_IFLNK | S_IRWXUGO; + /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ + int jbegin_count = +@@ -1014,6 +1036,13 @@ static int reiserfs_symlink(struct inode + } + new_inode_init(inode, parent_dir, mode); + ++ retval = reiserfs_security_init(parent_dir, inode, &security); ++ if (retval < 0) { ++ drop_new_inode(inode); ++ return retval; ++ } ++ jbegin_count += retval; ++ + reiserfs_write_lock(parent_dir->i_sb); + item_len = ROUND_UP(strlen(symname)); + if (item_len > MAX_DIRECT_ITEM_LEN(parent_dir->i_sb->s_blocksize)) { +@@ -1040,7 +1069,7 @@ static int reiserfs_symlink(struct inode + + retval = + reiserfs_new_inode(&th, parent_dir, mode, name, strlen(symname), +- dentry, inode); ++ dentry, inode, &security); + kfree(name); + if (retval) { /* reiserfs_new_inode iputs for us */ + goto out_failed; +--- a/fs/reiserfs/xattr_security.c ++++ b/fs/reiserfs/xattr_security.c +@@ -4,6 +4,7 @@ + #include + #include + #include ++#include + #include + + static int +@@ -47,6 +48,59 @@ static size_t security_list(struct inode + return len; + } + ++/* Initializes the security context for a new inode and returns the number ++ * of blocks needed for the transaction. If successful, reiserfs_security ++ * must be released using reiserfs_security_free when the caller is done. */ ++int reiserfs_security_init(struct inode *dir, struct inode *inode, ++ struct reiserfs_security_handle *sec) ++{ ++ int blocks = 0; ++ int error = security_inode_init_security(inode, dir, &sec->name, ++ &sec->value, &sec->length); ++ if (error) { ++ if (error == -EOPNOTSUPP) ++ error = 0; ++ ++ sec->name = NULL; ++ sec->value = NULL; ++ sec->length = 0; ++ return error; ++ } ++ ++ if (sec->length) { ++ blocks = reiserfs_xattr_jcreate_nblocks(inode) + ++ reiserfs_xattr_nblocks(inode, sec->length); ++ /* We don't want to count the directories twice if we have ++ * a default ACL. */ ++ REISERFS_I(inode)->i_flags |= i_has_xattr_dir; ++ } ++ return blocks; ++} ++ ++int reiserfs_security_write(struct reiserfs_transaction_handle *th, ++ struct inode *inode, ++ struct reiserfs_security_handle *sec) ++{ ++ int error; ++ if (strlen(sec->name) < sizeof(XATTR_SECURITY_PREFIX)) ++ return -EINVAL; ++ ++ error = reiserfs_xattr_set_handle(th, inode, sec->name, sec->value, ++ sec->length, XATTR_CREATE); ++ if (error == -ENODATA || error == -EOPNOTSUPP) ++ error = 0; ++ ++ return error; ++} ++ ++void reiserfs_security_free(struct reiserfs_security_handle *sec) ++{ ++ kfree(sec->name); ++ kfree(sec->value); ++ sec->name = NULL; ++ sec->value = NULL; ++} ++ + struct xattr_handler reiserfs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .get = security_get, +--- a/include/linux/reiserfs_fs.h ++++ b/include/linux/reiserfs_fs.h +@@ -1915,10 +1915,12 @@ void make_le_item_head(struct item_head + loff_t offset, int type, int length, int entry_count); + struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key); + ++struct reiserfs_security_handle; + int reiserfs_new_inode(struct reiserfs_transaction_handle *th, + struct inode *dir, int mode, + const char *symname, loff_t i_size, +- struct dentry *dentry, struct inode *inode); ++ struct dentry *dentry, struct inode *inode, ++ struct reiserfs_security_handle *security); + + void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, + struct inode *inode, loff_t size); +--- a/include/linux/reiserfs_xattr.h ++++ b/include/linux/reiserfs_xattr.h +@@ -15,6 +15,12 @@ struct reiserfs_xattr_header { + __le32 h_hash; /* hash of the value */ + }; + ++struct reiserfs_security_handle { ++ char *name; ++ void *value; ++ size_t length; ++}; ++ + #ifdef __KERNEL__ + + #include +@@ -54,6 +60,14 @@ int reiserfs_xattr_set_handle(struct rei + extern struct xattr_handler reiserfs_xattr_user_handler; + extern struct xattr_handler reiserfs_xattr_trusted_handler; + extern struct xattr_handler reiserfs_xattr_security_handler; ++#ifdef CONFIG_REISERFS_FS_SECURITY ++int reiserfs_security_init(struct inode *dir, struct inode *inode, ++ struct reiserfs_security_handle *sec); ++int reiserfs_security_write(struct reiserfs_transaction_handle *th, ++ struct inode *inode, ++ struct reiserfs_security_handle *sec); ++void reiserfs_security_free(struct reiserfs_security_handle *sec); ++#endif + + #define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header)) + static inline loff_t reiserfs_xattr_nblocks(struct inode *inode, loff_t size) +@@ -109,6 +123,24 @@ static inline void reiserfs_init_xattr_r + } + #endif /* CONFIG_REISERFS_FS_XATTR */ + ++#ifndef CONFIG_REISERFS_FS_SECURITY ++static inline int reiserfs_security_init(struct inode *dir, ++ struct inode *inode, ++ struct reiserfs_security_handle *sec) ++{ ++ return 0; ++} ++static inline int ++reiserfs_security_write(struct reiserfs_transaction_handle *th, ++ struct inode *inode, ++ struct reiserfs_security_handle *sec) ++{ ++ return 0; ++} ++static inline void reiserfs_security_free(struct reiserfs_security_handle *sec) ++{} ++#endif ++ + #endif /* __KERNEL__ */ + + #endif /* _LINUX_REISERFS_XATTR_H */ diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-simplify-buffer-info.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-simplify-buffer-info.diff new file mode 100644 index 0000000000..42160c6bdc --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-simplify-buffer-info.diff @@ -0,0 +1,363 @@ +From: Jeff Mahoney +Subject: [PATCH 31/40] reiserfs: factor out buffer_info initialization + + This is the first in a series of patches to make balance_leaf() not quite + so insane. + + This patch factors out the open coded initializations of buffer_info + structures and defines a few initializers for the 4 cases they're used. + +Signed-off-by: Jeff Mahoney + +--- + fs/reiserfs/do_balan.c | 175 ++++++++++++++++--------------------------------- + 1 file changed, 60 insertions(+), 115 deletions(-) + +--- a/fs/reiserfs/do_balan.c ++++ b/fs/reiserfs/do_balan.c +@@ -29,6 +29,43 @@ struct tree_balance *cur_tb = NULL; /* d + is interrupting do_balance */ + #endif + ++static inline void buffer_info_init_left(struct tree_balance *tb, ++ struct buffer_info *bi) ++{ ++ bi->tb = tb; ++ bi->bi_bh = tb->L[0]; ++ bi->bi_parent = tb->FL[0]; ++ bi->bi_position = get_left_neighbor_position(tb, 0); ++} ++ ++static inline void buffer_info_init_right(struct tree_balance *tb, ++ struct buffer_info *bi) ++{ ++ bi->tb = tb; ++ bi->bi_bh = tb->R[0]; ++ bi->bi_parent = tb->FR[0]; ++ bi->bi_position = get_right_neighbor_position(tb, 0); ++} ++ ++static inline void buffer_info_init_tbS0(struct tree_balance *tb, ++ struct buffer_info *bi) ++{ ++ bi->tb = tb; ++ bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); ++ bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); ++ bi->bi_position = PATH_H_POSITION(tb->tb_path, 1); ++} ++ ++static inline void buffer_info_init_bh(struct tree_balance *tb, ++ struct buffer_info *bi, ++ struct buffer_head *bh) ++{ ++ bi->tb = tb; ++ bi->bi_bh = bh; ++ bi->bi_parent = NULL; ++ bi->bi_position = 0; ++} ++ + inline void do_balance_mark_leaf_dirty(struct tree_balance *tb, + struct buffer_head *bh, int flag) + { +@@ -86,6 +123,7 @@ static int balance_leaf_when_delete(stru + "PAP-12010: tree can not be empty"); + + ih = B_N_PITEM_HEAD(tbS0, item_pos); ++ buffer_info_init_tbS0(tb, &bi); + + /* Delete or truncate the item */ + +@@ -96,10 +134,6 @@ static int balance_leaf_when_delete(stru + "vs-12013: mode Delete, insert size %d, ih to be deleted %h", + -tb->insert_size[0], ih); + +- bi.tb = tb; +- bi.bi_bh = tbS0; +- bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0); +- bi.bi_position = PATH_H_POSITION(tb->tb_path, 1); + leaf_delete_items(&bi, 0, item_pos, 1, -1); + + if (!item_pos && tb->CFL[0]) { +@@ -121,10 +155,6 @@ static int balance_leaf_when_delete(stru + break; + + case M_CUT:{ /* cut item in S[0] */ +- bi.tb = tb; +- bi.bi_bh = tbS0; +- bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0); +- bi.bi_position = PATH_H_POSITION(tb->tb_path, 1); + if (is_direntry_le_ih(ih)) { + + /* UFS unlink semantics are such that you can only delete one directory entry at a time. */ +@@ -325,11 +355,7 @@ static int balance_leaf(struct tree_bala + ih_item_len(ih)); + + /* Insert new item into L[0] */ +- bi.tb = tb; +- bi.bi_bh = tb->L[0]; +- bi.bi_parent = tb->FL[0]; +- bi.bi_position = +- get_left_neighbor_position(tb, 0); ++ buffer_info_init_left(tb, &bi); + leaf_insert_into_buf(&bi, + n + item_pos - + ret_val, ih, body, +@@ -369,11 +395,7 @@ static int balance_leaf(struct tree_bala + leaf_shift_left(tb, tb->lnum[0] - 1, + tb->lbytes); + /* Insert new item into L[0] */ +- bi.tb = tb; +- bi.bi_bh = tb->L[0]; +- bi.bi_parent = tb->FL[0]; +- bi.bi_position = +- get_left_neighbor_position(tb, 0); ++ buffer_info_init_left(tb, &bi); + leaf_insert_into_buf(&bi, + n + item_pos - + ret_val, ih, body, +@@ -429,13 +451,7 @@ static int balance_leaf(struct tree_bala + } + + /* Append given directory entry to directory item */ +- bi.tb = tb; +- bi.bi_bh = tb->L[0]; +- bi.bi_parent = +- tb->FL[0]; +- bi.bi_position = +- get_left_neighbor_position +- (tb, 0); ++ buffer_info_init_left(tb, &bi); + leaf_paste_in_buffer + (&bi, + n + item_pos - +@@ -523,13 +539,7 @@ static int balance_leaf(struct tree_bala + (tbS0, + item_pos))); + /* Append to body of item in L[0] */ +- bi.tb = tb; +- bi.bi_bh = tb->L[0]; +- bi.bi_parent = +- tb->FL[0]; +- bi.bi_position = +- get_left_neighbor_position +- (tb, 0); ++ buffer_info_init_left(tb, &bi); + leaf_paste_in_buffer + (&bi, + n + item_pos - +@@ -680,11 +690,7 @@ static int balance_leaf(struct tree_bala + leaf_shift_left(tb, tb->lnum[0], + tb->lbytes); + /* Append to body of item in L[0] */ +- bi.tb = tb; +- bi.bi_bh = tb->L[0]; +- bi.bi_parent = tb->FL[0]; +- bi.bi_position = +- get_left_neighbor_position(tb, 0); ++ buffer_info_init_left(tb, &bi); + leaf_paste_in_buffer(&bi, + n + item_pos - + ret_val, +@@ -776,11 +782,7 @@ static int balance_leaf(struct tree_bala + set_le_ih_k_offset(ih, offset); + put_ih_item_len(ih, tb->rbytes); + /* Insert part of the item into R[0] */ +- bi.tb = tb; +- bi.bi_bh = tb->R[0]; +- bi.bi_parent = tb->FR[0]; +- bi.bi_position = +- get_right_neighbor_position(tb, 0); ++ buffer_info_init_right(tb, &bi); + if ((old_len - tb->rbytes) > zeros_num) { + r_zeros_number = 0; + r_body = +@@ -817,11 +819,7 @@ static int balance_leaf(struct tree_bala + tb->rnum[0] - 1, + tb->rbytes); + /* Insert new item into R[0] */ +- bi.tb = tb; +- bi.bi_bh = tb->R[0]; +- bi.bi_parent = tb->FR[0]; +- bi.bi_position = +- get_right_neighbor_position(tb, 0); ++ buffer_info_init_right(tb, &bi); + leaf_insert_into_buf(&bi, + item_pos - n + + tb->rnum[0] - 1, +@@ -881,13 +879,7 @@ static int balance_leaf(struct tree_bala + pos_in_item - + entry_count + + tb->rbytes - 1; +- bi.tb = tb; +- bi.bi_bh = tb->R[0]; +- bi.bi_parent = +- tb->FR[0]; +- bi.bi_position = +- get_right_neighbor_position +- (tb, 0); ++ buffer_info_init_right(tb, &bi); + leaf_paste_in_buffer + (&bi, 0, + paste_entry_position, +@@ -1018,12 +1010,7 @@ static int balance_leaf(struct tree_bala + (tb, tb->CFR[0], 0); + + /* Append part of body into R[0] */ +- bi.tb = tb; +- bi.bi_bh = tb->R[0]; +- bi.bi_parent = tb->FR[0]; +- bi.bi_position = +- get_right_neighbor_position +- (tb, 0); ++ buffer_info_init_right(tb, &bi); + if (n_rem > zeros_num) { + r_zeros_number = 0; + r_body = +@@ -1070,12 +1057,7 @@ static int balance_leaf(struct tree_bala + tb->rbytes); + /* append item in R[0] */ + if (pos_in_item >= 0) { +- bi.tb = tb; +- bi.bi_bh = tb->R[0]; +- bi.bi_parent = tb->FR[0]; +- bi.bi_position = +- get_right_neighbor_position +- (tb, 0); ++ buffer_info_init_right(tb, &bi); + leaf_paste_in_buffer(&bi, + item_pos - + n + +@@ -1231,10 +1213,7 @@ static int balance_leaf(struct tree_bala + put_ih_item_len(ih, sbytes[i]); + + /* Insert part of the item into S_new[i] before 0-th item */ +- bi.tb = tb; +- bi.bi_bh = S_new[i]; +- bi.bi_parent = NULL; +- bi.bi_position = 0; ++ buffer_info_init_bh(tb, &bi, S_new[i]); + + if ((old_len - sbytes[i]) > zeros_num) { + r_zeros_number = 0; +@@ -1266,10 +1245,7 @@ static int balance_leaf(struct tree_bala + S_new[i]); + + /* Insert new item into S_new[i] */ +- bi.tb = tb; +- bi.bi_bh = S_new[i]; +- bi.bi_parent = NULL; +- bi.bi_position = 0; ++ buffer_info_init_bh(tb, &bi, S_new[i]); + leaf_insert_into_buf(&bi, + item_pos - n + + snum[i] - 1, ih, +@@ -1326,10 +1302,7 @@ static int balance_leaf(struct tree_bala + sbytes[i] - 1, + S_new[i]); + /* Paste given directory entry to directory item */ +- bi.tb = tb; +- bi.bi_bh = S_new[i]; +- bi.bi_parent = NULL; +- bi.bi_position = 0; ++ buffer_info_init_bh(tb, &bi, S_new[i]); + leaf_paste_in_buffer + (&bi, 0, + pos_in_item - +@@ -1399,11 +1372,7 @@ static int balance_leaf(struct tree_bala + if (n_rem < 0) + n_rem = 0; + /* Append part of body into S_new[0] */ +- bi.tb = tb; +- bi.bi_bh = S_new[i]; +- bi.bi_parent = NULL; +- bi.bi_position = 0; +- ++ buffer_info_init_bh(tb, &bi, S_new[i]); + if (n_rem > zeros_num) { + r_zeros_number = 0; + r_body = +@@ -1490,10 +1459,7 @@ static int balance_leaf(struct tree_bala + leaf_mi); + + /* paste into item */ +- bi.tb = tb; +- bi.bi_bh = S_new[i]; +- bi.bi_parent = NULL; +- bi.bi_position = 0; ++ buffer_info_init_bh(tb, &bi, S_new[i]); + leaf_paste_in_buffer(&bi, + item_pos - n + + snum[i], +@@ -1560,10 +1526,7 @@ static int balance_leaf(struct tree_bala + + switch (flag) { + case M_INSERT: /* insert item into S[0] */ +- bi.tb = tb; +- bi.bi_bh = tbS0; +- bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0); +- bi.bi_position = PATH_H_POSITION(tb->tb_path, 1); ++ buffer_info_init_tbS0(tb, &bi); + leaf_insert_into_buf(&bi, item_pos, ih, body, + zeros_num); + +@@ -1590,14 +1553,7 @@ static int balance_leaf(struct tree_bala + "PAP-12260: insert_size is 0 already"); + + /* prepare space */ +- bi.tb = tb; +- bi.bi_bh = tbS0; +- bi.bi_parent = +- PATH_H_PPARENT(tb->tb_path, +- 0); +- bi.bi_position = +- PATH_H_POSITION(tb->tb_path, +- 1); ++ buffer_info_init_tbS0(tb, &bi); + leaf_paste_in_buffer(&bi, + item_pos, + pos_in_item, +@@ -1645,14 +1601,7 @@ static int balance_leaf(struct tree_bala + RFALSE(tb->insert_size[0] <= 0, + "PAP-12275: insert size must not be %d", + tb->insert_size[0]); +- bi.tb = tb; +- bi.bi_bh = tbS0; +- bi.bi_parent = +- PATH_H_PPARENT(tb->tb_path, +- 0); +- bi.bi_position = +- PATH_H_POSITION(tb->tb_path, +- 1); ++ buffer_info_init_tbS0(tb, &bi); + leaf_paste_in_buffer(&bi, + item_pos, + pos_in_item, +@@ -1725,7 +1674,6 @@ void make_empty_node(struct buffer_info + struct buffer_head *get_FEB(struct tree_balance *tb) + { + int i; +- struct buffer_head *first_b; + struct buffer_info bi; + + for (i = 0; i < MAX_FEB_SIZE; i++) +@@ -1735,16 +1683,13 @@ struct buffer_head *get_FEB(struct tree_ + if (i == MAX_FEB_SIZE) + reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty"); + +- bi.tb = tb; +- bi.bi_bh = first_b = tb->FEB[i]; +- bi.bi_parent = NULL; +- bi.bi_position = 0; ++ buffer_info_init_bh(tb, &bi, tb->FEB[i]); + make_empty_node(&bi); +- set_buffer_uptodate(first_b); ++ set_buffer_uptodate(tb->FEB[i]); ++ tb->used[i] = tb->FEB[i]; + tb->FEB[i] = NULL; +- tb->used[i] = first_b; + +- return (first_b); ++ return tb->used[i]; + } + + /* This is now used because reiserfs_free_block has to be able to diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-simplify-xattr-internal-file-lookups-opens.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-simplify-xattr-internal-file-lookups-opens.diff new file mode 100644 index 0000000000..77e815fc43 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-simplify-xattr-internal-file-lookups-opens.diff @@ -0,0 +1,474 @@ +From: Jeff Mahoney +Subject: reiserfs: simplify xattr internal file lookups/opens + + The xattr file open/lookup code is needlessly complex. We can use vfs-level + operations to perform the same work, and also simplify the locking + constraints. The locking advantages will be exploited in future patches. + +Signed-off-by: Jeff Mahoney + +--- + + fs/reiserfs/xattr.c | 262 ++++++++++++++++++++++++++-------------------------- + 1 file changed, 135 insertions(+), 127 deletions(-) + +--- a/fs/reiserfs/xattr.c ++++ b/fs/reiserfs/xattr.c +@@ -44,100 +44,123 @@ + #include + #include + #include ++#include + +-#define FL_READONLY 128 +-#define FL_DIR_SEM_HELD 256 + #define PRIVROOT_NAME ".reiserfs_priv" + #define XAROOT_NAME "xattrs" + +-/* Returns the dentry referring to the root of the extended attribute +- * directory tree. If it has already been retrieved, it is used. If it +- * hasn't been created and the flags indicate creation is allowed, we +- * attempt to create it. On error, we return a pointer-encoded error. +- */ +-static struct dentry *get_xa_root(struct super_block *sb, int flags) ++/* Helpers for inode ops. We do this so that we don't have all the VFS ++ * overhead and also for proper i_mutex annotation. ++ * dir->i_mutex must be held for all of them. */ ++static int xattr_create(struct inode *dir, struct dentry *dentry, int mode) + { +- struct dentry *privroot = dget(REISERFS_SB(sb)->priv_root); +- struct dentry *xaroot; ++ BUG_ON(!mutex_is_locked(&dir->i_mutex)); ++ DQUOT_INIT(dir); ++ return dir->i_op->create(dir, dentry, mode, NULL); ++} + +- /* This needs to be created at mount-time */ +- if (!privroot) +- return ERR_PTR(-ENODATA); ++static int xattr_mkdir(struct inode *dir, struct dentry *dentry, int mode) ++{ ++ BUG_ON(!mutex_is_locked(&dir->i_mutex)); ++ DQUOT_INIT(dir); ++ return dir->i_op->mkdir(dir, dentry, mode); ++} + +- mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR); +- if (REISERFS_SB(sb)->xattr_root) { +- xaroot = dget(REISERFS_SB(sb)->xattr_root); +- goto out; +- } ++/* We use I_MUTEX_CHILD here to silence lockdep. It's safe because xattr ++ * mutation ops aren't called during rename or splace, which are the ++ * only other users of I_MUTEX_CHILD. It violates the ordering, but that's ++ * better than allocating another subclass just for this code. */ ++static int xattr_unlink(struct inode *dir, struct dentry *dentry) ++{ ++ int error; ++ BUG_ON(!mutex_is_locked(&dir->i_mutex)); ++ DQUOT_INIT(dir); + +- xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME)); +- if (IS_ERR(xaroot)) { +- goto out; +- } else if (!xaroot->d_inode) { ++ mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); ++ error = dir->i_op->unlink(dir, dentry); ++ mutex_unlock(&dentry->d_inode->i_mutex); ++ ++ if (!error) ++ d_delete(dentry); ++ return error; ++} ++ ++static int xattr_rmdir(struct inode *dir, struct dentry *dentry) ++{ ++ int error; ++ BUG_ON(!mutex_is_locked(&dir->i_mutex)); ++ DQUOT_INIT(dir); ++ ++ mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); ++ dentry_unhash(dentry); ++ error = dir->i_op->rmdir(dir, dentry); ++ if (!error) ++ dentry->d_inode->i_flags |= S_DEAD; ++ mutex_unlock(&dentry->d_inode->i_mutex); ++ if (!error) ++ d_delete(dentry); ++ dput(dentry); ++ ++ return error; ++} ++ ++ ++#define xattr_may_create(flags) (!flags || flags & XATTR_CREATE) ++ ++/* Returns and possibly creates the xattr dir. */ ++static struct dentry *lookup_or_create_dir(struct dentry *parent, ++ const char *name, int flags) ++{ ++ struct dentry *dentry; ++ BUG_ON(!parent); ++ ++ dentry = lookup_one_len(name, parent, strlen(name)); ++ if (IS_ERR(dentry)) ++ return dentry; ++ else if (!dentry->d_inode) { + int err = -ENODATA; +- if (flags == 0 || flags & XATTR_CREATE) +- err = privroot->d_inode->i_op->mkdir(privroot->d_inode, +- xaroot, 0700); ++ ++ if (xattr_may_create(flags)) { ++ mutex_lock_nested(&parent->d_inode->i_mutex, ++ I_MUTEX_XATTR); ++ err = xattr_mkdir(parent->d_inode, dentry, 0700); ++ mutex_unlock(&parent->d_inode->i_mutex); ++ } ++ + if (err) { +- dput(xaroot); +- xaroot = ERR_PTR(err); +- goto out; ++ dput(dentry); ++ dentry = ERR_PTR(err); + } + } +- REISERFS_SB(sb)->xattr_root = dget(xaroot); + +- out: +- mutex_unlock(&privroot->d_inode->i_mutex); +- dput(privroot); +- return xaroot; ++ return dentry; ++} ++ ++static struct dentry *open_xa_root(struct super_block *sb, int flags) ++{ ++ struct dentry *privroot = REISERFS_SB(sb)->priv_root; ++ if (!privroot) ++ return ERR_PTR(-ENODATA); ++ return lookup_or_create_dir(privroot, XAROOT_NAME, flags); + } + +-/* Opens the directory corresponding to the inode's extended attribute store. +- * If flags allow, the tree to the directory may be created. If creation is +- * prohibited, -ENODATA is returned. */ + static struct dentry *open_xa_dir(const struct inode *inode, int flags) + { + struct dentry *xaroot, *xadir; + char namebuf[17]; + +- xaroot = get_xa_root(inode->i_sb, flags); ++ xaroot = open_xa_root(inode->i_sb, flags); + if (IS_ERR(xaroot)) + return xaroot; + +- /* ok, we have xaroot open */ + snprintf(namebuf, sizeof(namebuf), "%X.%X", + le32_to_cpu(INODE_PKEY(inode)->k_objectid), + inode->i_generation); +- xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf)); +- if (IS_ERR(xadir)) { +- dput(xaroot); +- return xadir; +- } +- +- if (!xadir->d_inode) { +- int err; +- if (flags == 0 || flags & XATTR_CREATE) { +- /* Although there is nothing else trying to create this directory, +- * another directory with the same hash may be created, so we need +- * to protect against that */ +- err = +- xaroot->d_inode->i_op->mkdir(xaroot->d_inode, xadir, +- 0700); +- if (err) { +- dput(xaroot); +- dput(xadir); +- return ERR_PTR(err); +- } +- } +- if (!xadir->d_inode) { +- dput(xaroot); +- dput(xadir); +- return ERR_PTR(-ENODATA); +- } +- } + ++ xadir = lookup_or_create_dir(xaroot, namebuf, flags); + dput(xaroot); + return xadir; ++ + } + + /* +@@ -302,13 +325,11 @@ static + int xattr_readdir(struct inode *inode, filldir_t filler, void *buf) + { + int res = -ENOENT; +- mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR); + if (!IS_DEADDIR(inode)) { + lock_kernel(); + res = __xattr_readdir(inode, buf, filler); + unlock_kernel(); + } +- mutex_unlock(&inode->i_mutex); + return res; + } + +@@ -345,9 +366,7 @@ __reiserfs_xattr_del(struct dentry *xadi + return -EIO; + } + +- err = dir->i_op->unlink(dir, dentry); +- if (!err) +- d_delete(dentry); ++ err = xattr_unlink(dir, dentry); + + out_file: + dput(dentry); +@@ -381,7 +400,7 @@ int reiserfs_delete_xattrs(struct inode + return 0; + + reiserfs_read_lock_xattrs(inode->i_sb); +- dir = open_xa_dir(inode, FL_READONLY); ++ dir = open_xa_dir(inode, XATTR_REPLACE); + reiserfs_read_unlock_xattrs(inode->i_sb); + if (IS_ERR(dir)) { + err = PTR_ERR(dir); +@@ -391,25 +410,25 @@ int reiserfs_delete_xattrs(struct inode + return 0; + } + +- lock_kernel(); ++ mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); + err = xattr_readdir(dir->d_inode, reiserfs_delete_xattrs_filler, dir); +- if (err) { +- unlock_kernel(); ++ mutex_unlock(&dir->d_inode->i_mutex); ++ if (err) + goto out_dir; +- } + + /* Leftovers besides . and .. -- that's not good. */ + if (dir->d_inode->i_nlink <= 2) { +- root = get_xa_root(inode->i_sb, XATTR_REPLACE); ++ root = open_xa_root(inode->i_sb, XATTR_REPLACE); + reiserfs_write_lock_xattrs(inode->i_sb); +- err = vfs_rmdir(root->d_inode, dir); ++ mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_XATTR); ++ err = xattr_rmdir(root->d_inode, dir); ++ mutex_unlock(&root->d_inode->i_mutex); + reiserfs_write_unlock_xattrs(inode->i_sb); + dput(root); + } else { + reiserfs_warning(inode->i_sb, "jdm-20006", + "Couldn't remove all entries in directory"); + } +- unlock_kernel(); + + out_dir: + dput(dir); +@@ -445,8 +464,11 @@ reiserfs_chown_xattrs_filler(void *buf, + return -ENODATA; + } + +- if (!S_ISDIR(xafile->d_inode->i_mode)) ++ if (!S_ISDIR(xafile->d_inode->i_mode)) { ++ mutex_lock_nested(&xafile->d_inode->i_mutex, I_MUTEX_CHILD); + err = notify_change(xafile, attrs); ++ mutex_unlock(&xafile->d_inode->i_mutex); ++ } + dput(xafile); + + return err; +@@ -464,38 +486,31 @@ int reiserfs_chown_xattrs(struct inode * + return 0; + + reiserfs_read_lock_xattrs(inode->i_sb); +- dir = open_xa_dir(inode, FL_READONLY); ++ dir = open_xa_dir(inode, XATTR_REPLACE); + reiserfs_read_unlock_xattrs(inode->i_sb); + if (IS_ERR(dir)) { + if (PTR_ERR(dir) != -ENODATA) + err = PTR_ERR(dir); + goto out; +- } else if (!dir->d_inode) { +- dput(dir); +- goto out; +- } +- +- lock_kernel(); ++ } else if (!dir->d_inode) ++ goto out_dir; + + attrs->ia_valid &= (ATTR_UID | ATTR_GID | ATTR_CTIME); + buf.xadir = dir; + buf.attrs = attrs; + buf.inode = inode; + ++ mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); + err = xattr_readdir(dir->d_inode, reiserfs_chown_xattrs_filler, &buf); +- if (err) { +- unlock_kernel(); +- goto out_dir; +- } + +- err = notify_change(dir, attrs); +- unlock_kernel(); ++ if (!err) ++ err = notify_change(dir, attrs); ++ mutex_unlock(&dir->d_inode->i_mutex); + ++ attrs->ia_valid = ia_valid; + out_dir: + dput(dir); +- + out: +- attrs->ia_valid = ia_valid; + return err; + } + +@@ -513,47 +528,35 @@ static struct dentry *get_xa_file_dentry + int err = 0; + + xadir = open_xa_dir(inode, flags); +- if (IS_ERR(xadir)) { ++ if (IS_ERR(xadir)) + return ERR_CAST(xadir); +- } else if (xadir && !xadir->d_inode) { +- dput(xadir); +- return ERR_PTR(-ENODATA); +- } + + xafile = lookup_one_len(name, xadir, strlen(name)); + if (IS_ERR(xafile)) { +- dput(xadir); +- return ERR_CAST(xafile); ++ err = PTR_ERR(xafile); ++ goto out; + } + +- if (xafile->d_inode) { /* file exists */ +- if (flags & XATTR_CREATE) { +- err = -EEXIST; +- dput(xafile); +- goto out; +- } +- } else if (flags & XATTR_REPLACE || flags & FL_READONLY) { +- goto out; +- } else { +- /* inode->i_mutex is down, so nothing else can try to create +- * the same xattr */ +- err = xadir->d_inode->i_op->create(xadir->d_inode, xafile, +- 0700 | S_IFREG, NULL); ++ if (xafile->d_inode && (flags & XATTR_CREATE)) ++ err = -EEXIST; + +- if (err) { +- dput(xafile); +- goto out; ++ if (!xafile->d_inode) { ++ err = -ENODATA; ++ if (xattr_may_create(flags)) { ++ mutex_lock_nested(&xadir->d_inode->i_mutex, ++ I_MUTEX_XATTR); ++ err = xattr_create(xadir->d_inode, xafile, ++ 0700|S_IFREG); ++ mutex_unlock(&xadir->d_inode->i_mutex); + } + } + ++ if (err) ++ dput(xafile); + out: + dput(xadir); + if (err) +- xafile = ERR_PTR(err); +- else if (!xafile->d_inode) { +- dput(xafile); +- xafile = ERR_PTR(-ENODATA); +- } ++ return ERR_PTR(err); + return xafile; + } + +@@ -633,6 +636,7 @@ reiserfs_xattr_set(struct inode *inode, + newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; + mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); + err = notify_change(dentry, &newattrs); ++ mutex_unlock(&dentry->d_inode->i_mutex); + if (err) + goto out_filp; + +@@ -692,7 +696,6 @@ reiserfs_xattr_set(struct inode *inode, + } + + out_filp: +- mutex_unlock(&dentry->d_inode->i_mutex); + dput(dentry); + + out: +@@ -722,7 +725,7 @@ reiserfs_xattr_get(const struct inode *i + if (get_inode_sd_version(inode) == STAT_DATA_V1) + return -EOPNOTSUPP; + +- dentry = get_xa_file_dentry(inode, name, FL_READONLY); ++ dentry = get_xa_file_dentry(inode, name, XATTR_REPLACE); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + goto out; +@@ -806,13 +809,15 @@ int reiserfs_xattr_del(struct inode *ino + struct dentry *dir; + int err; + +- dir = open_xa_dir(inode, FL_READONLY); ++ dir = open_xa_dir(inode, XATTR_REPLACE); + if (IS_ERR(dir)) { + err = PTR_ERR(dir); + goto out; + } + ++ mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); + err = __reiserfs_xattr_del(dir, name, strlen(name)); ++ mutex_unlock(&dir->d_inode->i_mutex); + dput(dir); + + if (!err) { +@@ -826,6 +831,7 @@ int reiserfs_xattr_del(struct inode *ino + + /* Actual operations that are exported to VFS-land */ + ++static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char *); + /* + * Inode operation getxattr() + * Preliminary locking: we down dentry->d_inode->i_mutex +@@ -978,7 +984,7 @@ ssize_t reiserfs_listxattr(struct dentry + + reiserfs_read_lock_xattr_i(dentry->d_inode); + reiserfs_read_lock_xattrs(dentry->d_sb); +- dir = open_xa_dir(dentry->d_inode, FL_READONLY); ++ dir = open_xa_dir(dentry->d_inode, XATTR_REPLACE); + reiserfs_read_unlock_xattrs(dentry->d_sb); + if (IS_ERR(dir)) { + err = PTR_ERR(dir); +@@ -994,7 +1000,9 @@ ssize_t reiserfs_listxattr(struct dentry + + REISERFS_I(dentry->d_inode)->i_flags |= i_has_xattr_dir; + ++ mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); + err = xattr_readdir(dir->d_inode, reiserfs_listxattr_filler, &buf); ++ mutex_unlock(&dir->d_inode->i_mutex); + if (err) + goto out_dir; + +@@ -1146,7 +1154,7 @@ static int create_privroot(struct dentry + int err; + struct inode *inode = dentry->d_parent->d_inode; + mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR); +- err = inode->i_op->mkdir(inode, dentry, 0700); ++ err = xattr_mkdir(inode, dentry, 0700); + mutex_unlock(&inode->i_mutex); + if (err) { + dput(dentry); diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-strip-whitespace.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-strip-whitespace.diff new file mode 100644 index 0000000000..cf4f8dce7f --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-strip-whitespace.diff @@ -0,0 +1,1335 @@ +From: Jeff Mahoney +Subject: reiserfs: strip trailing whitespace + + This patch strips trailing whitespace from the reiserfs code. + +Signed-off-by: Jeff Mahoney + +--- + fs/reiserfs/README | 4 - + fs/reiserfs/do_balan.c | 14 ++-- + fs/reiserfs/file.c | 8 +- + fs/reiserfs/fix_node.c | 38 ++++++------ + fs/reiserfs/hashes.c | 2 + fs/reiserfs/ibalance.c | 10 +-- + fs/reiserfs/inode.c | 52 ++++++++--------- + fs/reiserfs/ioctl.c | 2 + fs/reiserfs/journal.c | 120 ++++++++++++++++++++--------------------- + fs/reiserfs/lbalance.c | 18 +++--- + fs/reiserfs/namei.c | 30 +++++----- + fs/reiserfs/objectid.c | 2 + fs/reiserfs/prints.c | 26 ++++---- + fs/reiserfs/procfs.c | 2 + fs/reiserfs/resize.c | 6 +- + fs/reiserfs/stree.c | 8 +- + fs/reiserfs/super.c | 10 +-- + fs/reiserfs/tail_conversion.c | 2 + include/linux/reiserfs_fs_sb.h | 14 ++-- + 19 files changed, 184 insertions(+), 184 deletions(-) + +--- a/fs/reiserfs/do_balan.c ++++ b/fs/reiserfs/do_balan.c +@@ -76,21 +76,21 @@ inline void do_balance_mark_leaf_dirty(s + #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty + #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty + +-/* summary: ++/* summary: + if deleting something ( tb->insert_size[0] < 0 ) + return(balance_leaf_when_delete()); (flag d handled here) + else + if lnum is larger than 0 we put items into the left node + if rnum is larger than 0 we put items into the right node + if snum1 is larger than 0 we put items into the new node s1 +- if snum2 is larger than 0 we put items into the new node s2 ++ if snum2 is larger than 0 we put items into the new node s2 + Note that all *num* count new items being created. + + It would be easier to read balance_leaf() if each of these summary + lines was a separate procedure rather than being inlined. I think + that there are many passages here and in balance_leaf_when_delete() in + which two calls to one procedure can replace two passages, and it +-might save cache space and improve software maintenance costs to do so. ++might save cache space and improve software maintenance costs to do so. + + Vladimir made the perceptive comment that we should offload most of + the decision making in this function into fix_nodes/check_balance, and +@@ -288,15 +288,15 @@ static int balance_leaf(struct tree_bala + ) + { + struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); +- int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0] ++ int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0] + of the affected item */ + struct buffer_info bi; + struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */ + int snum[2]; /* number of items that will be placed + into S_new (includes partially shifted + items) */ +- int sbytes[2]; /* if an item is partially shifted into S_new then +- if it is a directory item ++ int sbytes[2]; /* if an item is partially shifted into S_new then ++ if it is a directory item + it is the number of entries from the item that are shifted into S_new + else + it is the number of bytes from the item that are shifted into S_new +@@ -1983,7 +1983,7 @@ static inline void do_balance_starts(str + /* store_print_tb (tb); */ + + /* do not delete, just comment it out */ +-/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, ++/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, + "check");*/ + RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); + #ifdef CONFIG_REISERFS_CHECK +--- a/fs/reiserfs/file.c ++++ b/fs/reiserfs/file.c +@@ -20,14 +20,14 @@ + ** insertion/balancing, for files that are written in one write. + ** It avoids unnecessary tail packings (balances) for files that are written in + ** multiple writes and are small enough to have tails. +-** ++** + ** file_release is called by the VFS layer when the file is closed. If + ** this is the last open file descriptor, and the file + ** small enough to have a tail, and the tail is currently in an + ** unformatted node, the tail is converted back into a direct item. +-** ++** + ** We use reiserfs_truncate_file to pack the tail, since it already has +-** all the conditions coded. ++** all the conditions coded. + */ + static int reiserfs_file_release(struct inode *inode, struct file *filp) + { +@@ -223,7 +223,7 @@ int reiserfs_commit_page(struct inode *i + } + + /* Write @count bytes at position @ppos in a file indicated by @file +- from the buffer @buf. ++ from the buffer @buf. + + generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want + something simple that works. It is not for serious use by general purpose filesystems, excepting the one that it was +--- a/fs/reiserfs/fix_node.c ++++ b/fs/reiserfs/fix_node.c +@@ -30,8 +30,8 @@ + ** get_direct_parent + ** get_neighbors + ** fix_nodes +- ** +- ** ++ ** ++ ** + **/ + + #include +@@ -377,9 +377,9 @@ static int get_num_ver(int mode, struct + int needed_nodes; + int start_item, /* position of item we start filling node from */ + end_item, /* position of item we finish filling node by */ +- start_bytes, /* number of first bytes (entries for directory) of start_item-th item ++ start_bytes, /* number of first bytes (entries for directory) of start_item-th item + we do not include into node that is being filled */ +- end_bytes; /* number of last bytes (entries for directory) of end_item-th item ++ end_bytes; /* number of last bytes (entries for directory) of end_item-th item + we do node include into node that is being filled */ + int split_item_positions[2]; /* these are positions in virtual item of + items, that are split between S[0] and +@@ -569,7 +569,7 @@ extern struct tree_balance *cur_tb; + + /* Set parameters for balancing. + * Performs write of results of analysis of balancing into structure tb, +- * where it will later be used by the functions that actually do the balancing. ++ * where it will later be used by the functions that actually do the balancing. + * Parameters: + * tb tree_balance structure; + * h current level of the node; +@@ -1204,7 +1204,7 @@ static inline int can_node_be_removed(in + * h current level of the node; + * inum item number in S[h]; + * mode i - insert, p - paste; +- * Returns: 1 - schedule occurred; ++ * Returns: 1 - schedule occurred; + * 0 - balancing for higher levels needed; + * -1 - no balancing for higher levels needed; + * -2 - no disk space. +@@ -1239,7 +1239,7 @@ static int ip_check_balance(struct tree_ + /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters. + where 4th parameter is s1bytes and 5th - s2bytes + */ +- short snum012[40] = { 0, }; /* s0num, s1num, s2num for 8 cases ++ short snum012[40] = { 0, }; /* s0num, s1num, s2num for 8 cases + 0,1 - do not shift and do not shift but bottle + 2 - shift only whole item to left + 3 - shift to left and bottle as much as possible +@@ -1288,7 +1288,7 @@ static int ip_check_balance(struct tree_ + + create_virtual_node(tb, h); + +- /* ++ /* + determine maximal number of items we can shift to the left neighbor (in tb structure) + and the maximal number of bytes that can flow to the left neighbor + from the left most liquid item that cannot be shifted from S[0] entirely (returned value) +@@ -1349,13 +1349,13 @@ static int ip_check_balance(struct tree_ + + { + int lpar, rpar, nset, lset, rset, lrset; +- /* ++ /* + * regular overflowing of the node + */ + +- /* get_num_ver works in 2 modes (FLOW & NO_FLOW) ++ /* get_num_ver works in 2 modes (FLOW & NO_FLOW) + lpar, rpar - number of items we can shift to left/right neighbor (including splitting item) +- nset, lset, rset, lrset - shows, whether flowing items give better packing ++ nset, lset, rset, lrset - shows, whether flowing items give better packing + */ + #define FLOW 1 + #define NO_FLOW 0 /* do not any splitting */ +@@ -1545,7 +1545,7 @@ static int ip_check_balance(struct tree_ + * h current level of the node; + * inum item number in S[h]; + * mode i - insert, p - paste; +- * Returns: 1 - schedule occurred; ++ * Returns: 1 - schedule occurred; + * 0 - balancing for higher levels needed; + * -1 - no balancing for higher levels needed; + * -2 - no disk space. +@@ -1728,7 +1728,7 @@ static int dc_check_balance_internal(str + * h current level of the node; + * inum item number in S[h]; + * mode i - insert, p - paste; +- * Returns: 1 - schedule occurred; ++ * Returns: 1 - schedule occurred; + * 0 - balancing for higher levels needed; + * -1 - no balancing for higher levels needed; + * -2 - no disk space. +@@ -1822,7 +1822,7 @@ static int dc_check_balance_leaf(struct + * h current level of the node; + * inum item number in S[h]; + * mode d - delete, c - cut. +- * Returns: 1 - schedule occurred; ++ * Returns: 1 - schedule occurred; + * 0 - balancing for higher levels needed; + * -1 - no balancing for higher levels needed; + * -2 - no disk space. +@@ -1851,7 +1851,7 @@ static int dc_check_balance(struct tree_ + * h current level of the node; + * inum item number in S[h]; + * mode i - insert, p - paste, d - delete, c - cut. +- * Returns: 1 - schedule occurred; ++ * Returns: 1 - schedule occurred; + * 0 - balancing for higher levels needed; + * -1 - no balancing for higher levels needed; + * -2 - no disk space. +@@ -2296,15 +2296,15 @@ static int wait_tb_buffers_until_unlocke + * analyze what and where should be moved; + * get sufficient number of new nodes; + * Balancing will start only after all resources will be collected at a time. +- * ++ * + * When ported to SMP kernels, only at the last moment after all needed nodes + * are collected in cache, will the resources be locked using the usual + * textbook ordered lock acquisition algorithms. Note that ensuring that + * this code neither write locks what it does not need to write lock nor locks out of order + * will be a pain in the butt that could have been avoided. Grumble grumble. -Hans +- * ++ * + * fix is meant in the sense of render unchanging +- * ++ * + * Latency might be improved by first gathering a list of what buffers are needed + * and then getting as many of them in parallel as possible? -Hans + * +@@ -2316,7 +2316,7 @@ static int wait_tb_buffers_until_unlocke + * ins_ih & ins_sd are used when inserting + * Returns: 1 - schedule occurred while the function worked; + * 0 - schedule didn't occur while the function worked; +- * -1 - if no_disk_space ++ * -1 - if no_disk_space + */ + + int fix_nodes(int n_op_mode, struct tree_balance *p_s_tb, struct item_head *p_s_ins_ih, // item head of item being inserted +--- a/fs/reiserfs/hashes.c ++++ b/fs/reiserfs/hashes.c +@@ -7,7 +7,7 @@ + * (see Applied Cryptography, 2nd edition, p448). + * + * Jeremy Fitzhardinge 1998 +- * ++ * + * Jeremy has agreed to the contents of reiserfs/README. -Hans + * Yura's function is added (04/07/2000) + */ +--- a/fs/reiserfs/ibalance.c ++++ b/fs/reiserfs/ibalance.c +@@ -278,7 +278,7 @@ static void internal_delete_childs(struc + + /* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest + * last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest +- * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest ++ * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest + */ + static void internal_copy_pointers_items(struct buffer_info *dest_bi, + struct buffer_head *src, +@@ -385,7 +385,7 @@ static void internal_move_pointers_items + if (last_first == FIRST_TO_LAST) { /* shift_left occurs */ + first_pointer = 0; + first_item = 0; +- /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer, ++ /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer, + for key - with first_item */ + internal_delete_pointers_items(src_bi, first_pointer, + first_item, cpy_num - del_par); +@@ -453,7 +453,7 @@ static void internal_insert_key(struct b + } + } + +-/* Insert d_key'th (delimiting) key from buffer cfl to tail of dest. ++/* Insert d_key'th (delimiting) key from buffer cfl to tail of dest. + * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest. + * Replace d_key'th key in buffer cfl. + * Delete pointer_amount items and node pointers from buffer src. +@@ -518,7 +518,7 @@ static void internal_shift1_left(struct + /* internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1); */ + } + +-/* Insert d_key'th (delimiting) key from buffer cfr to head of dest. ++/* Insert d_key'th (delimiting) key from buffer cfr to head of dest. + * Copy n node pointers and n - 1 items from buffer src to buffer dest. + * Replace d_key'th key in buffer cfr. + * Delete n items and node pointers from buffer src. +@@ -749,7 +749,7 @@ int balance_internal(struct tree_balance + this means that new pointers and items must be inserted AFTER * + child_pos + } +- else ++ else + { + it is the position of the leftmost pointer that must be deleted (together with + its corresponding key to the left of the pointer) +--- a/fs/reiserfs/inode.c ++++ b/fs/reiserfs/inode.c +@@ -52,7 +52,7 @@ void reiserfs_delete_inode(struct inode + /* Do quota update inside a transaction for journaled quotas. We must do that + * after delete_object so that quota updates go into the same transaction as + * stat data deletion */ +- if (!err) ++ if (!err) + DQUOT_FREE_INODE(inode); + + if (journal_end(&th, inode->i_sb, jbegin_count)) +@@ -363,7 +363,7 @@ static int _get_block_create_0(struct in + } + /* make sure we don't read more bytes than actually exist in + ** the file. This can happen in odd cases where i_size isn't +- ** correct, and when direct item padding results in a few ++ ** correct, and when direct item padding results in a few + ** extra bytes at the end of the direct item + */ + if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) +@@ -438,15 +438,15 @@ static int reiserfs_bmap(struct inode *i + ** -ENOENT instead of a valid buffer. block_prepare_write expects to + ** be able to do i/o on the buffers returned, unless an error value + ** is also returned. +-** ++** + ** So, this allows block_prepare_write to be used for reading a single block + ** in a page. Where it does not produce a valid page for holes, or past the + ** end of the file. This turns out to be exactly what we need for reading + ** tails for conversion. + ** + ** The point of the wrapper is forcing a certain value for create, even +-** though the VFS layer is calling this function with create==1. If you +-** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, ++** though the VFS layer is calling this function with create==1. If you ++** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, + ** don't use this function. + */ + static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, +@@ -602,7 +602,7 @@ int reiserfs_get_block(struct inode *ino + int done; + int fs_gen; + struct reiserfs_transaction_handle *th = NULL; +- /* space reserved in transaction batch: ++ /* space reserved in transaction batch: + . 3 balancings in direct->indirect conversion + . 1 block involved into reiserfs_update_sd() + XXX in practically impossible worst case direct2indirect() +@@ -754,7 +754,7 @@ int reiserfs_get_block(struct inode *ino + reiserfs_write_unlock(inode->i_sb); + + /* the item was found, so new blocks were not added to the file +- ** there is no need to make sure the inode is updated with this ++ ** there is no need to make sure the inode is updated with this + ** transaction + */ + return retval; +@@ -986,7 +986,7 @@ int reiserfs_get_block(struct inode *ino + + /* this loop could log more blocks than we had originally asked + ** for. So, we have to allow the transaction to end if it is +- ** too big or too full. Update the inode so things are ++ ** too big or too full. Update the inode so things are + ** consistent if we crash before the function returns + ** + ** release the path so that anybody waiting on the path before +@@ -997,7 +997,7 @@ int reiserfs_get_block(struct inode *ino + if (retval) + goto failure; + } +- /* inserting indirect pointers for a hole can take a ++ /* inserting indirect pointers for a hole can take a + ** long time. reschedule if needed + */ + cond_resched(); +@@ -1444,7 +1444,7 @@ void reiserfs_read_locked_inode(struct i + update sd on unlink all that is required is to check for nlink + here. This bug was first found by Sizif when debugging + SquidNG/Butterfly, forgotten, and found again after Philippe +- Gramoulle reproduced it. ++ Gramoulle reproduced it. + + More logical fix would require changes in fs/inode.c:iput() to + remove inode from hash-table _after_ fs cleaned disk stuff up and +@@ -1628,7 +1628,7 @@ int reiserfs_write_inode(struct inode *i + if (inode->i_sb->s_flags & MS_RDONLY) + return -EROFS; + /* memory pressure can sometimes initiate write_inode calls with sync == 1, +- ** these cases are just when the system needs ram, not when the ++ ** these cases are just when the system needs ram, not when the + ** inode needs to reach disk for safety, and they can safely be + ** ignored because the altered inode has already been logged. + */ +@@ -1745,7 +1745,7 @@ static int reiserfs_new_symlink(struct r + /* inserts the stat data into the tree, and then calls + reiserfs_new_directory (to insert ".", ".." item if new object is + directory) or reiserfs_new_symlink (to insert symlink body if new +- object is symlink) or nothing (if new object is regular file) ++ object is symlink) or nothing (if new object is regular file) + + NOTE! uid and gid must already be set in the inode. If we return + non-zero due to an error, we have to drop the quota previously allocated +@@ -1753,7 +1753,7 @@ static int reiserfs_new_symlink(struct r + if we return non-zero, we also end the transaction. */ + int reiserfs_new_inode(struct reiserfs_transaction_handle *th, + struct inode *dir, int mode, const char *symname, +- /* 0 for regular, EMTRY_DIR_SIZE for dirs, ++ /* 0 for regular, EMTRY_DIR_SIZE for dirs, + strlen (symname) for symlinks) */ + loff_t i_size, struct dentry *dentry, + struct inode *inode, +@@ -1788,7 +1788,7 @@ int reiserfs_new_inode(struct reiserfs_t + goto out_bad_inode; + } + if (old_format_only(sb)) +- /* not a perfect generation count, as object ids can be reused, but ++ /* not a perfect generation count, as object ids can be reused, but + ** this is as good as reiserfs can do right now. + ** note that the private part of inode isn't filled in yet, we have + ** to use the directory. +@@ -2086,7 +2086,7 @@ int reiserfs_truncate_file(struct inode + + if (p_s_inode->i_size > 0) { + if ((error = grab_tail_page(p_s_inode, &page, &bh))) { +- // -ENOENT means we truncated past the end of the file, ++ // -ENOENT means we truncated past the end of the file, + // and get_block_create_0 could not find a block to read in, + // which is ok. + if (error != -ENOENT) +@@ -2098,11 +2098,11 @@ int reiserfs_truncate_file(struct inode + } + } + +- /* so, if page != NULL, we have a buffer head for the offset at +- ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, +- ** then we have an unformatted node. Otherwise, we have a direct item, +- ** and no zeroing is required on disk. We zero after the truncate, +- ** because the truncate might pack the item anyway ++ /* so, if page != NULL, we have a buffer head for the offset at ++ ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, ++ ** then we have an unformatted node. Otherwise, we have a direct item, ++ ** and no zeroing is required on disk. We zero after the truncate, ++ ** because the truncate might pack the item anyway + ** (it will unmap bh if it packs). + */ + /* it is enough to reserve space in transaction for 2 balancings: +@@ -2311,8 +2311,8 @@ static int map_block_for_writepage(struc + return retval; + } + +-/* +- * mason@suse.com: updated in 2.5.54 to follow the same general io ++/* ++ * mason@suse.com: updated in 2.5.54 to follow the same general io + * start/recovery path as __block_write_full_page, along with special + * code to handle reiserfs tails. + */ +@@ -2452,7 +2452,7 @@ static int reiserfs_write_full_page(stru + unlock_page(page); + + /* +- * since any buffer might be the only dirty buffer on the page, ++ * since any buffer might be the only dirty buffer on the page, + * the first submit_bh can bring the page out of writeback. + * be careful with the buffers. + */ +@@ -2471,8 +2471,8 @@ static int reiserfs_write_full_page(stru + if (nr == 0) { + /* + * if this page only had a direct item, it is very possible for +- * no io to be required without there being an error. Or, +- * someone else could have locked them and sent them down the ++ * no io to be required without there being an error. Or, ++ * someone else could have locked them and sent them down the + * pipe without locking the page + */ + bh = head; +@@ -2491,7 +2491,7 @@ static int reiserfs_write_full_page(stru + + fail: + /* catches various errors, we need to make sure any valid dirty blocks +- * get to the media. The page is currently locked and not marked for ++ * get to the media. The page is currently locked and not marked for + * writeback + */ + ClearPageUptodate(page); +--- a/fs/reiserfs/ioctl.c ++++ b/fs/reiserfs/ioctl.c +@@ -189,7 +189,7 @@ int reiserfs_unpack(struct inode *inode, + } + + /* we unpack by finding the page with the tail, and calling +- ** reiserfs_prepare_write on that page. This will force a ++ ** reiserfs_prepare_write on that page. This will force a + ** reiserfs_get_block to unpack the tail for us. + */ + index = inode->i_size >> PAGE_CACHE_SHIFT; +--- a/fs/reiserfs/journal.c ++++ b/fs/reiserfs/journal.c +@@ -1,36 +1,36 @@ + /* + ** Write ahead logging implementation copyright Chris Mason 2000 + ** +-** The background commits make this code very interelated, and ++** The background commits make this code very interelated, and + ** overly complex. I need to rethink things a bit....The major players: + ** +-** journal_begin -- call with the number of blocks you expect to log. ++** journal_begin -- call with the number of blocks you expect to log. + ** If the current transaction is too +-** old, it will block until the current transaction is ++** old, it will block until the current transaction is + ** finished, and then start a new one. +-** Usually, your transaction will get joined in with ++** Usually, your transaction will get joined in with + ** previous ones for speed. + ** +-** journal_join -- same as journal_begin, but won't block on the current ++** journal_join -- same as journal_begin, but won't block on the current + ** transaction regardless of age. Don't ever call +-** this. Ever. There are only two places it should be ++** this. Ever. There are only two places it should be + ** called from, and they are both inside this file. + ** +-** journal_mark_dirty -- adds blocks into this transaction. clears any flags ++** journal_mark_dirty -- adds blocks into this transaction. clears any flags + ** that might make them get sent to disk +-** and then marks them BH_JDirty. Puts the buffer head +-** into the current transaction hash. ++** and then marks them BH_JDirty. Puts the buffer head ++** into the current transaction hash. + ** + ** journal_end -- if the current transaction is batchable, it does nothing + ** otherwise, it could do an async/synchronous commit, or +-** a full flush of all log and real blocks in the ++** a full flush of all log and real blocks in the + ** transaction. + ** +-** flush_old_commits -- if the current transaction is too old, it is ended and +-** commit blocks are sent to disk. Forces commit blocks +-** to disk for all backgrounded commits that have been ++** flush_old_commits -- if the current transaction is too old, it is ended and ++** commit blocks are sent to disk. Forces commit blocks ++** to disk for all backgrounded commits that have been + ** around too long. +-** -- Note, if you call this as an immediate flush from ++** -- Note, if you call this as an immediate flush from + ** from within kupdate, it will ignore the immediate flag + */ + +@@ -212,7 +212,7 @@ static void allocate_bitmap_nodes(struct + list_add(&bn->list, &journal->j_bitmap_nodes); + journal->j_free_bitmap_nodes++; + } else { +- break; // this is ok, we'll try again when more are needed ++ break; /* this is ok, we'll try again when more are needed */ + } + } + } +@@ -283,7 +283,7 @@ static int free_bitmap_nodes(struct supe + } + + /* +-** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. ++** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. + ** jb_array is the array to be filled in. + */ + int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb, +@@ -315,7 +315,7 @@ int reiserfs_allocate_list_bitmaps(struc + } + + /* +-** find an available list bitmap. If you can't find one, flush a commit list ++** find an available list bitmap. If you can't find one, flush a commit list + ** and try again + */ + static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *p_s_sb, +@@ -348,7 +348,7 @@ static struct reiserfs_list_bitmap *get_ + return jb; + } + +-/* ++/* + ** allocates a new chunk of X nodes, and links them all together as a list. + ** Uses the cnode->next and cnode->prev pointers + ** returns NULL on failure +@@ -376,7 +376,7 @@ static struct reiserfs_journal_cnode *al + } + + /* +-** pulls a cnode off the free list, or returns NULL on failure ++** pulls a cnode off the free list, or returns NULL on failure + */ + static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb) + { +@@ -403,7 +403,7 @@ static struct reiserfs_journal_cnode *ge + } + + /* +-** returns a cnode to the free list ++** returns a cnode to the free list + */ + static void free_cnode(struct super_block *p_s_sb, + struct reiserfs_journal_cnode *cn) +@@ -1192,8 +1192,8 @@ static int flush_commit_list(struct supe + } + + /* +-** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or +-** returns NULL if it can't find anything ++** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or ++** returns NULL if it can't find anything + */ + static struct reiserfs_journal_list *find_newer_jl_for_cn(struct + reiserfs_journal_cnode +@@ -1335,8 +1335,8 @@ static int update_journal_header_block(s + return _update_journal_header_block(p_s_sb, offset, trans_id); + } + +-/* +-** flush any and all journal lists older than you are ++/* ++** flush any and all journal lists older than you are + ** can only be called from flush_journal_list + */ + static int flush_older_journal_lists(struct super_block *p_s_sb, +@@ -1382,8 +1382,8 @@ static void del_from_work_list(struct su + ** always set flushall to 1, unless you are calling from inside + ** flush_journal_list + ** +-** IMPORTANT. This can only be called while there are no journal writers, +-** and the journal is locked. That means it can only be called from ++** IMPORTANT. This can only be called while there are no journal writers, ++** and the journal is locked. That means it can only be called from + ** do_journal_end, or by journal_release + */ + static int flush_journal_list(struct super_block *s, +@@ -1429,7 +1429,7 @@ static int flush_journal_list(struct sup + goto flush_older_and_return; + } + +- /* start by putting the commit list on disk. This will also flush ++ /* start by putting the commit list on disk. This will also flush + ** the commit lists of any olders transactions + */ + flush_commit_list(s, jl, 1); +@@ -1444,8 +1444,8 @@ static int flush_journal_list(struct sup + goto flush_older_and_return; + } + +- /* loop through each cnode, see if we need to write it, +- ** or wait on a more recent transaction, or just ignore it ++ /* loop through each cnode, see if we need to write it, ++ ** or wait on a more recent transaction, or just ignore it + */ + if (atomic_read(&(journal->j_wcount)) != 0) { + reiserfs_panic(s, "journal-844", "journal list is flushing, " +@@ -1473,8 +1473,8 @@ static int flush_journal_list(struct sup + if (!pjl && cn->bh) { + saved_bh = cn->bh; + +- /* we do this to make sure nobody releases the buffer while +- ** we are working with it ++ /* we do this to make sure nobody releases the buffer while ++ ** we are working with it + */ + get_bh(saved_bh); + +@@ -1497,8 +1497,8 @@ static int flush_journal_list(struct sup + goto free_cnode; + } + +- /* bh == NULL when the block got to disk on its own, OR, +- ** the block got freed in a future transaction ++ /* bh == NULL when the block got to disk on its own, OR, ++ ** the block got freed in a future transaction + */ + if (saved_bh == NULL) { + goto free_cnode; +@@ -1586,7 +1586,7 @@ static int flush_journal_list(struct sup + __func__); + flush_older_and_return: + +- /* before we can update the journal header block, we _must_ flush all ++ /* before we can update the journal header block, we _must_ flush all + ** real blocks from all older transactions to disk. This is because + ** once the header block is updated, this transaction will not be + ** replayed after a crash +@@ -1596,7 +1596,7 @@ static int flush_journal_list(struct sup + } + + err = journal->j_errno; +- /* before we can remove everything from the hash tables for this ++ /* before we can remove everything from the hash tables for this + ** transaction, we must make sure it can never be replayed + ** + ** since we are only called from do_journal_end, we know for sure there +@@ -2016,9 +2016,9 @@ static int journal_compare_desc_commit(s + return 0; + } + +-/* returns 0 if it did not find a description block ++/* returns 0 if it did not find a description block + ** returns -1 if it found a corrupt commit block +-** returns 1 if both desc and commit were valid ++** returns 1 if both desc and commit were valid + */ + static int journal_transaction_is_valid(struct super_block *p_s_sb, + struct buffer_head *d_bh, +@@ -2380,8 +2380,8 @@ static int journal_read(struct super_blo + bdevname(journal->j_dev_bd, b)); + start = get_seconds(); + +- /* step 1, read in the journal header block. Check the transaction it says +- ** is the first unflushed, and if that transaction is not valid, ++ /* step 1, read in the journal header block. Check the transaction it says ++ ** is the first unflushed, and if that transaction is not valid, + ** replay is done + */ + journal->j_header_bh = journal_bread(p_s_sb, +@@ -2406,8 +2406,8 @@ static int journal_read(struct super_blo + le32_to_cpu(jh->j_last_flush_trans_id)); + valid_journal_header = 1; + +- /* now, we try to read the first unflushed offset. If it is not valid, +- ** there is nothing more we can do, and it makes no sense to read ++ /* now, we try to read the first unflushed offset. If it is not valid, ++ ** there is nothing more we can do, and it makes no sense to read + ** through the whole log. + */ + d_bh = +@@ -2916,7 +2916,7 @@ int journal_transaction_should_end(struc + return 0; + } + +-/* this must be called inside a transaction, and requires the ++/* this must be called inside a transaction, and requires the + ** kernel_lock to be held + */ + void reiserfs_block_writes(struct reiserfs_transaction_handle *th) +@@ -3037,7 +3037,7 @@ static int do_journal_begin_r(struct rei + now = get_seconds(); + + /* if there is no room in the journal OR +- ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning ++ ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning + ** we don't sleep if there aren't other writers + */ + +@@ -3237,7 +3237,7 @@ int journal_begin(struct reiserfs_transa + ** + ** if it was dirty, cleans and files onto the clean list. I can't let it be dirty again until the + ** transaction is committed. +-** ++** + ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. + */ + int journal_mark_dirty(struct reiserfs_transaction_handle *th, +@@ -3287,7 +3287,7 @@ int journal_mark_dirty(struct reiserfs_t + atomic_read(&(journal->j_wcount))); + return 1; + } +- /* this error means I've screwed up, and we've overflowed the transaction. ++ /* this error means I've screwed up, and we've overflowed the transaction. + ** Nothing can be done here, except make the FS readonly or panic. + */ + if (journal->j_len >= journal->j_trans_max) { +@@ -3377,7 +3377,7 @@ int journal_end(struct reiserfs_transact + } + } + +-/* removes from the current transaction, relsing and descrementing any counters. ++/* removes from the current transaction, relsing and descrementing any counters. + ** also files the removed buffer directly onto the clean list + ** + ** called by journal_mark_freed when a block has been deleted +@@ -3475,7 +3475,7 @@ static int can_dirty(struct reiserfs_jou + } + + /* syncs the commit blocks, but does not force the real buffers to disk +-** will wait until the current transaction is done/committed before returning ++** will wait until the current transaction is done/committed before returning + */ + int journal_end_sync(struct reiserfs_transaction_handle *th, + struct super_block *p_s_sb, unsigned long nblocks) +@@ -3557,13 +3557,13 @@ int reiserfs_flush_old_commits(struct su + + /* + ** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit +-** +-** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all ++** ++** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all + ** the writers are done. By the time it wakes up, the transaction it was called has already ended, so it just + ** flushes the commit list and returns 0. + ** + ** Won't batch when flush or commit_now is set. Also won't batch when others are waiting on j_join_wait. +-** ++** + ** Note, we can't allow the journal_end to proceed while there are still writers in the log. + */ + static int check_journal_end(struct reiserfs_transaction_handle *th, +@@ -3591,7 +3591,7 @@ static int check_journal_end(struct reis + atomic_dec(&(journal->j_wcount)); + } + +- /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released ++ /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released + ** will be dealt with by next transaction that actually writes something, but should be taken + ** care of in this trans + */ +@@ -3600,7 +3600,7 @@ static int check_journal_end(struct reis + /* if wcount > 0, and we are called to with flush or commit_now, + ** we wait on j_join_wait. We will wake up when the last writer has + ** finished the transaction, and started it on its way to the disk. +- ** Then, we flush the commit or journal list, and just return 0 ++ ** Then, we flush the commit or journal list, and just return 0 + ** because the rest of journal end was already done for this transaction. + */ + if (atomic_read(&(journal->j_wcount)) > 0) { +@@ -3671,7 +3671,7 @@ static int check_journal_end(struct reis + /* + ** Does all the work that makes deleting blocks safe. + ** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on. +-** ++** + ** otherwise: + ** set a bit for the block in the journal bitmap. That will prevent it from being allocated for unformatted nodes + ** before this transaction has finished. +@@ -3875,7 +3875,7 @@ extern struct tree_balance *cur_tb; + ** be written to disk while we are altering it. So, we must: + ** clean it + ** wait on it. +-** ++** + */ + int reiserfs_prepare_for_journal(struct super_block *p_s_sb, + struct buffer_head *bh, int wait) +@@ -3917,7 +3917,7 @@ static void flush_old_journal_lists(stru + } + } + +-/* ++/* + ** long and ugly. If flush, will not return until all commit + ** blocks and all real buffers in the trans are on disk. + ** If no_async, won't return until all commit blocks are on disk. +@@ -3978,7 +3978,7 @@ static int do_journal_end(struct reiserf + wait_on_commit = 1; + } + +- /* check_journal_end locks the journal, and unlocks if it does not return 1 ++ /* check_journal_end locks the journal, and unlocks if it does not return 1 + ** it tells us if we should continue with the journal_end, or just return + */ + if (!check_journal_end(th, p_s_sb, nblocks, flags)) { +@@ -4075,7 +4075,7 @@ static int do_journal_end(struct reiserf + last_cn->next = jl_cn; + } + last_cn = jl_cn; +- /* make sure the block we are trying to log is not a block ++ /* make sure the block we are trying to log is not a block + of journal or reserved area */ + + if (is_block_in_log_or_reserved_area +@@ -4222,9 +4222,9 @@ static int do_journal_end(struct reiserf + } else if (!(jl->j_state & LIST_COMMIT_PENDING)) + queue_delayed_work(commit_wq, &journal->j_work, HZ / 10); + +- /* if the next transaction has any chance of wrapping, flush +- ** transactions that might get overwritten. If any journal lists are very +- ** old flush them as well. ++ /* if the next transaction has any chance of wrapping, flush ++ ** transactions that might get overwritten. If any journal lists are very ++ ** old flush them as well. + */ + first_jl: + list_for_each_safe(entry, safe, &journal->j_journal_list) { +--- a/fs/reiserfs/lbalance.c ++++ b/fs/reiserfs/lbalance.c +@@ -119,8 +119,8 @@ static void leaf_copy_dir_entries(struct + DEH_SIZE * copy_count + copy_records_len); + } + +-/* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or +- part of it or nothing (see the return 0 below) from SOURCE to the end ++/* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or ++ part of it or nothing (see the return 0 below) from SOURCE to the end + (if last_first) or beginning (!last_first) of the DEST */ + /* returns 1 if anything was copied, else 0 */ + static int leaf_copy_boundary_item(struct buffer_info *dest_bi, +@@ -396,7 +396,7 @@ static void leaf_item_bottle(struct buff + else { + struct item_head n_ih; + +- /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST ++ /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST + part defined by 'cpy_bytes'; create new item header; change old item_header (????); + n_ih = new item_header; + */ +@@ -426,7 +426,7 @@ static void leaf_item_bottle(struct buff + else { + struct item_head n_ih; + +- /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST ++ /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST + part defined by 'cpy_bytes'; create new item header; + n_ih = new item_header; + */ +@@ -724,7 +724,7 @@ int leaf_shift_right(struct tree_balance + static void leaf_delete_items_entirely(struct buffer_info *bi, + int first, int del_num); + /* If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR. +- If not. ++ If not. + If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of + the first item. Part defined by del_bytes. Don't delete first item header + If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of +@@ -783,7 +783,7 @@ void leaf_delete_items(struct buffer_inf + /* len = body len of item */ + len = ih_item_len(ih); + +- /* delete the part of the last item of the bh ++ /* delete the part of the last item of the bh + do not delete item header + */ + leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1, +@@ -865,7 +865,7 @@ void leaf_insert_into_buf(struct buffer_ + } + } + +-/* paste paste_size bytes to affected_item_num-th item. ++/* paste paste_size bytes to affected_item_num-th item. + When item is a directory, this only prepare space for new entries */ + void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num, + int pos_in_item, int paste_size, +@@ -1022,7 +1022,7 @@ static int leaf_cut_entries(struct buffe + /* when cut item is part of regular file + pos_in_item - first byte that must be cut + cut_size - number of bytes to be cut beginning from pos_in_item +- ++ + when cut item is part of directory + pos_in_item - number of first deleted entry + cut_size - count of deleted entries +@@ -1275,7 +1275,7 @@ void leaf_paste_entries(struct buffer_in + /* change item key if necessary (when we paste before 0-th entry */ + if (!before) { + set_le_ih_k_offset(ih, deh_offset(new_dehs)); +-/* memcpy (&ih->ih_key.k_offset, ++/* memcpy (&ih->ih_key.k_offset, + &new_dehs->deh_offset, SHORT_KEY_SIZE);*/ + } + #ifdef CONFIG_REISERFS_CHECK +--- a/fs/reiserfs/namei.c ++++ b/fs/reiserfs/namei.c +@@ -106,7 +106,7 @@ key of the first directory entry in it. + This function first calls search_by_key, then, if item whose first + entry matches is not found it looks for the entry inside directory + item found by search_by_key. Fills the path to the entry, and to the +-entry position in the item ++entry position in the item + + */ + +@@ -371,7 +371,7 @@ static struct dentry *reiserfs_lookup(st + return d_splice_alias(inode, dentry); + } + +-/* ++/* + ** looks up the dentry of the parent directory for child. + ** taken from ext2_get_parent + */ +@@ -410,7 +410,7 @@ struct dentry *reiserfs_get_parent(struc + return parent; + } + +-/* add entry to the directory (entry can be hidden). ++/* add entry to the directory (entry can be hidden). + + insert definition of when hidden directories are used here -Hans + +@@ -568,7 +568,7 @@ static int drop_new_inode(struct inode * + return 0; + } + +-/* utility function that does setup for reiserfs_new_inode. ++/* utility function that does setup for reiserfs_new_inode. + ** DQUOT_INIT needs lots of credits so it's better to have it + ** outside of a transaction, so we had to pull some bits of + ** reiserfs_new_inode out into this func. +@@ -823,7 +823,7 @@ static inline int reiserfs_empty_dir(str + { + /* we can cheat because an old format dir cannot have + ** EMPTY_DIR_SIZE, and a new format dir cannot have +- ** EMPTY_DIR_SIZE_V1. So, if the inode is either size, ++ ** EMPTY_DIR_SIZE_V1. So, if the inode is either size, + ** regardless of disk format version, the directory is empty. + */ + if (inode->i_size != EMPTY_DIR_SIZE && +@@ -1163,7 +1163,7 @@ static int reiserfs_link(struct dentry * + return retval; + } + +-// de contains information pointing to an entry which ++/* de contains information pointing to an entry which */ + static int de_still_valid(const char *name, int len, + struct reiserfs_dir_entry *de) + { +@@ -1207,10 +1207,10 @@ static void set_ino_in_dir_entry(struct + de->de_deh[de->de_entry_num].deh_objectid = key->k_objectid; + } + +-/* ++/* + * process, that is going to call fix_nodes/do_balance must hold only + * one path. If it holds 2 or more, it can get into endless waiting in +- * get_empty_nodes or its clones ++ * get_empty_nodes or its clones + */ + static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +@@ -1264,7 +1264,7 @@ static int reiserfs_rename(struct inode + + old_inode_mode = old_inode->i_mode; + if (S_ISDIR(old_inode_mode)) { +- // make sure, that directory being renamed has correct ".." ++ // make sure, that directory being renamed has correct ".." + // and that its new parent directory has not too many links + // already + +@@ -1275,8 +1275,8 @@ static int reiserfs_rename(struct inode + } + } + +- /* directory is renamed, its parent directory will be changed, +- ** so find ".." entry ++ /* directory is renamed, its parent directory will be changed, ++ ** so find ".." entry + */ + dot_dot_de.de_gen_number_bit_string = NULL; + retval = +@@ -1386,9 +1386,9 @@ static int reiserfs_rename(struct inode + this stuff, yes? Then, having + gathered everything into RAM we + should lock the buffers, yes? -Hans */ +- /* probably. our rename needs to hold more +- ** than one path at once. The seals would +- ** have to be written to deal with multi-path ++ /* probably. our rename needs to hold more ++ ** than one path at once. The seals would ++ ** have to be written to deal with multi-path + ** issues -chris + */ + /* sanity checking before doing the rename - avoid races many +@@ -1466,7 +1466,7 @@ static int reiserfs_rename(struct inode + } + + if (S_ISDIR(old_inode_mode)) { +- // adjust ".." of renamed directory ++ /* adjust ".." of renamed directory */ + set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir)); + journal_mark_dirty(&th, new_dir->i_sb, dot_dot_de.de_bh); + +--- a/fs/reiserfs/objectid.c ++++ b/fs/reiserfs/objectid.c +@@ -180,7 +180,7 @@ int reiserfs_convert_objectid_map_v1(str + + if (cur_size > new_size) { + /* mark everyone used that was listed as free at the end of the objectid +- ** map ++ ** map + */ + objectid_map[new_size - 1] = objectid_map[cur_size - 1]; + set_sb_oid_cursize(disk_sb, new_size); +--- a/fs/reiserfs/prints.c ++++ b/fs/reiserfs/prints.c +@@ -181,11 +181,11 @@ static char *is_there_reiserfs_struct(ch + appropriative printk. With this reiserfs_warning you can use format + specification for complex structures like you used to do with + printfs for integers, doubles and pointers. For instance, to print +- out key structure you have to write just: +- reiserfs_warning ("bad key %k", key); +- instead of +- printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid, +- key->k_offset, key->k_uniqueness); ++ out key structure you have to write just: ++ reiserfs_warning ("bad key %k", key); ++ instead of ++ printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid, ++ key->k_offset, key->k_uniqueness); + */ + + static void prepare_error_buf(const char *fmt, va_list args) +@@ -247,11 +247,11 @@ static void prepare_error_buf(const char + } + + /* in addition to usual conversion specifiers this accepts reiserfs +- specific conversion specifiers: +- %k to print little endian key, +- %K to print cpu key, ++ specific conversion specifiers: ++ %k to print little endian key, ++ %K to print cpu key, + %h to print item_head, +- %t to print directory entry ++ %t to print directory entry + %z to print block head (arg must be struct buffer_head * + %b to print buffer_head + */ +@@ -317,17 +317,17 @@ void reiserfs_debug(struct super_block * + maintainer-errorid. Don't bother with reusing errorids, there are + lots of numbers out there. + +- Example: +- ++ Example: ++ + reiserfs_panic( + p_sb, "reiser-29: reiserfs_new_blocknrs: " + "one of search_start or rn(%d) is equal to MAX_B_NUM," +- "which means that we are optimizing location based on the bogus location of a temp buffer (%p).", ++ "which means that we are optimizing location based on the bogus location of a temp buffer (%p).", + rn, bh + ); + + Regular panic()s sometimes clear the screen before the message can +- be read, thus the need for the while loop. ++ be read, thus the need for the while loop. + + Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it + pointless complexity): +--- a/fs/reiserfs/procfs.c ++++ b/fs/reiserfs/procfs.c +@@ -636,7 +636,7 @@ int reiserfs_global_version_in_proc(char + * + */ + +-/* ++/* + * Make Linus happy. + * Local variables: + * c-indentation-style: "K&R" +--- a/fs/reiserfs/README ++++ b/fs/reiserfs/README +@@ -1,4 +1,4 @@ +-[LICENSING] ++[LICENSING] + + ReiserFS is hereby licensed under the GNU General + Public License version 2. +@@ -31,7 +31,7 @@ the GPL as not allowing those additional + it wrongly, and Richard Stallman agrees with me, when carefully read + you can see that those restrictions on additional terms do not apply + to the owner of the copyright, and my interpretation of this shall +-govern for this license. ++govern for this license. + + Finally, nothing in this license shall be interpreted to allow you to + fail to fairly credit me, or to remove my credits, without my +--- a/fs/reiserfs/resize.c ++++ b/fs/reiserfs/resize.c +@@ -1,8 +1,8 @@ +-/* ++/* + * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README + */ + +-/* ++/* + * Written by Alexander Zarochentcev. + * + * The kernel part of the (on-line) reiserfs resizer. +@@ -101,7 +101,7 @@ int reiserfs_resize(struct super_block * + memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size); + + /* just in case vfree schedules on us, copy the new +- ** pointer into the journal struct before freeing the ++ ** pointer into the journal struct before freeing the + ** old one + */ + node_tmp = jb->bitmaps; +--- a/fs/reiserfs/stree.c ++++ b/fs/reiserfs/stree.c +@@ -77,7 +77,7 @@ inline void copy_item_head(struct item_h + /* k1 is pointer to on-disk structure which is stored in little-endian + form. k2 is pointer to cpu variable. For key of items of the same + object this returns 0. +- Returns: -1 if key1 < key2 ++ Returns: -1 if key1 < key2 + 0 if key1 == key2 + 1 if key1 > key2 */ + inline int comp_short_keys(const struct reiserfs_key *le_key, +@@ -890,7 +890,7 @@ static inline int prepare_for_direct_ite + } + // new file gets truncated + if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) { +- // ++ // + round_len = ROUND_UP(new_file_length); + /* this was n_new_file_length < le_ih ... */ + if (round_len < le_ih_k_offset(le_ih)) { +@@ -1443,7 +1443,7 @@ static int maybe_indirect_to_direct(stru + if (atomic_read(&p_s_inode->i_count) > 1 || + !tail_has_to_be_packed(p_s_inode) || + !page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) { +- // leave tail in an unformatted node ++ /* leave tail in an unformatted node */ + *p_c_mode = M_SKIP_BALANCING; + cut_bytes = + n_block_size - (n_new_file_size & (n_block_size - 1)); +@@ -1826,7 +1826,7 @@ int reiserfs_do_truncate(struct reiserfs + /* While there are bytes to truncate and previous file item is presented in the tree. */ + + /* +- ** This loop could take a really long time, and could log ++ ** This loop could take a really long time, and could log + ** many more blocks than a transaction can hold. So, we do a polite + ** journal end here, and if the transaction needs ending, we make + ** sure the file is consistent before ending the current trans +--- a/fs/reiserfs/super.c ++++ b/fs/reiserfs/super.c +@@ -754,7 +754,7 @@ static int reiserfs_getopt(struct super_ + char **opt_arg, unsigned long *bit_flags) + { + char *p; +- /* foo=bar, ++ /* foo=bar, + ^ ^ ^ + | | +-- option_end + | +-- arg_start +@@ -1346,7 +1346,7 @@ static int read_super_block(struct super + } + // + // ok, reiserfs signature (old or new) found in at the given offset +- // ++ // + fs_blocksize = sb_blocksize(rs); + brelse(bh); + sb_set_blocksize(s, fs_blocksize); +@@ -1532,8 +1532,8 @@ static int what_hash(struct super_block + code = find_hash_out(s); + + if (code != UNSET_HASH && reiserfs_hash_detect(s)) { +- /* detection has found the hash, and we must check against the +- ** mount options ++ /* detection has found the hash, and we must check against the ++ ** mount options + */ + if (reiserfs_rupasov_hash(s) && code != YURA_HASH) { + reiserfs_warning(s, "reiserfs-2507", +@@ -1565,7 +1565,7 @@ static int what_hash(struct super_block + } + } + +- /* if we are mounted RW, and we have a new valid hash code, update ++ /* if we are mounted RW, and we have a new valid hash code, update + ** the super + */ + if (code != UNSET_HASH && +--- a/fs/reiserfs/tail_conversion.c ++++ b/fs/reiserfs/tail_conversion.c +@@ -46,7 +46,7 @@ int direct2indirect(struct reiserfs_tran + /* Set the key to search for the place for new unfm pointer */ + make_cpu_key(&end_key, inode, tail_offset, TYPE_INDIRECT, 4); + +- // FIXME: we could avoid this ++ /* FIXME: we could avoid this */ + if (search_for_position_by_key(sb, &end_key, path) == POSITION_FOUND) { + reiserfs_error(sb, "PAP-14030", + "pasted or inserted byte exists in " +--- a/include/linux/reiserfs_fs_sb.h ++++ b/include/linux/reiserfs_fs_sb.h +@@ -14,7 +14,7 @@ typedef enum { + } reiserfs_super_block_flags; + + /* struct reiserfs_super_block accessors/mutators +- * since this is a disk structure, it will always be in ++ * since this is a disk structure, it will always be in + * little endian format. */ + #define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count)) + #define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v)) +@@ -83,16 +83,16 @@ typedef enum { + + /* LOGGING -- */ + +-/* These all interelate for performance. ++/* These all interelate for performance. + ** +-** If the journal block count is smaller than n transactions, you lose speed. ++** If the journal block count is smaller than n transactions, you lose speed. + ** I don't know what n is yet, I'm guessing 8-16. + ** + ** typical transaction size depends on the application, how often fsync is +-** called, and how many metadata blocks you dirty in a 30 second period. ++** called, and how many metadata blocks you dirty in a 30 second period. + ** The more small files (<16k) you use, the larger your transactions will + ** be. +-** ++** + ** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal + ** to wrap, which slows things down. If you need high speed meta data updates, the journal should be big enough + ** to prevent wrapping before dirty meta blocks get to disk. +@@ -241,7 +241,7 @@ struct reiserfs_journal { + + struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS]; /* array of bitmaps to record the deleted blocks */ + struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE]; /* hash table for real buffer heads in current trans */ +- struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; /* hash table for all the real buffer heads in all ++ struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; /* hash table for all the real buffer heads in all + the transactions */ + struct list_head j_prealloc_list; /* list of inodes which have preallocated blocks */ + int j_persistent_trans; +@@ -425,7 +425,7 @@ enum reiserfs_mount_options { + partition will be dealt with in a + manner of 3.5.x */ + +-/* -o hash={tea, rupasov, r5, detect} is meant for properly mounting ++/* -o hash={tea, rupasov, r5, detect} is meant for properly mounting + ** reiserfs disks from 3.5.19 or earlier. 99% of the time, this option + ** is not required. If the normal autodection code can't determine which + ** hash to use (because both hashes had the same value for a file) diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-use-generic-xattr-handlers.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-use-generic-xattr-handlers.diff new file mode 100644 index 0000000000..d197a09523 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-use-generic-xattr-handlers.diff @@ -0,0 +1,1142 @@ +Subject: reiserfs: use generic xattr handlers +From: Jeff Mahoney + + Christoph Hellwig had asked me quite some time ago to port the reiserfs + xattrs to the generic xattr interface. + + This patch replaces the reiserfs-specific xattr handling code with the + generic struct xattr_handler. + + However, since reiserfs doesn't split the prefix and name when accessing + xattrs, it can't leverage generic_{set,get,list,remove}xattr without + needlessly reconstructing the name on the back end. + + Update 7/26/07: Added missing dput() to deletion path. + Update 8/30/07: Added missing mark_inode_dirty when i_mode is used to + represent an ACL and no previous ACL existed. + +Signed-off-by: Jeff Mahoney + +--- + + fs/reiserfs/super.c | 7 + fs/reiserfs/xattr.c | 467 ++++++++++++++++------------------------- + fs/reiserfs/xattr_acl.c | 79 ++---- + fs/reiserfs/xattr_security.c | 26 -- + fs/reiserfs/xattr_trusted.c | 45 --- + fs/reiserfs/xattr_user.c | 31 -- + include/linux/reiserfs_acl.h | 16 - + include/linux/reiserfs_fs_sb.h | 3 + include/linux/reiserfs_xattr.h | 25 -- + 9 files changed, 258 insertions(+), 441 deletions(-) + +--- a/fs/reiserfs/super.c ++++ b/fs/reiserfs/super.c +@@ -2261,9 +2261,6 @@ static int __init init_reiserfs_fs(void) + return ret; + } + +- if ((ret = reiserfs_xattr_register_handlers())) +- goto failed_reiserfs_xattr_register_handlers; +- + reiserfs_proc_info_global_init(); + reiserfs_proc_register_global("version", + reiserfs_global_version_in_proc); +@@ -2274,9 +2271,6 @@ static int __init init_reiserfs_fs(void) + return 0; + } + +- reiserfs_xattr_unregister_handlers(); +- +- failed_reiserfs_xattr_register_handlers: + reiserfs_proc_unregister_global("version"); + reiserfs_proc_info_global_done(); + destroy_inodecache(); +@@ -2286,7 +2280,6 @@ static int __init init_reiserfs_fs(void) + + static void __exit exit_reiserfs_fs(void) + { +- reiserfs_xattr_unregister_handlers(); + reiserfs_proc_unregister_global("version"); + reiserfs_proc_info_global_done(); + unregister_filesystem(&reiserfs_fs_type); +--- a/fs/reiserfs/xattr_acl.c ++++ b/fs/reiserfs/xattr_acl.c +@@ -271,7 +271,7 @@ reiserfs_set_acl(struct inode *inode, in + char *name; + void *value = NULL; + struct posix_acl **p_acl; +- size_t size; ++ size_t size = 0; + int error; + struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); + +@@ -308,16 +308,21 @@ reiserfs_set_acl(struct inode *inode, in + value = posix_acl_to_disk(acl, &size); + if (IS_ERR(value)) + return (int)PTR_ERR(value); +- error = reiserfs_xattr_set(inode, name, value, size, 0); +- } else { +- error = reiserfs_xattr_del(inode, name); +- if (error == -ENODATA) { +- /* This may seem odd here, but it means that the ACL was set +- * with a value representable with mode bits. If there was +- * an ACL before, reiserfs_xattr_del already dirtied the inode. +- */ ++ } ++ ++ error = __reiserfs_xattr_set(inode, name, value, size, 0); ++ ++ /* ++ * Ensure that the inode gets dirtied if we're only using ++ * the mode bits and an old ACL didn't exist. We don't need ++ * to check if the inode is hashed here since we won't get ++ * called by reiserfs_inherit_default_acl(). ++ */ ++ if (error == -ENODATA) { ++ error = 0; ++ if (type == ACL_TYPE_ACCESS) { ++ inode->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); +- error = 0; + } + } + +@@ -474,33 +479,22 @@ posix_acl_access_set(struct inode *inode + return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); + } + +-static int posix_acl_access_del(struct inode *inode, const char *name) +-{ +- struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); +- if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1) +- return -EINVAL; +- iset_acl(inode, &reiserfs_i->i_acl_access, ERR_PTR(-ENODATA)); +- return 0; +-} +- +-static int +-posix_acl_access_list(struct inode *inode, const char *name, int namelen, +- char *out) ++static size_t posix_acl_access_list(struct inode *inode, char *list, ++ size_t list_size, const char *name, ++ size_t name_len) + { +- int len = namelen; ++ const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); + if (!reiserfs_posixacl(inode->i_sb)) + return 0; +- if (out) +- memcpy(out, name, len); +- +- return len; ++ if (list && size <= list_size) ++ memcpy(list, POSIX_ACL_XATTR_ACCESS, size); ++ return size; + } + +-struct reiserfs_xattr_handler posix_acl_access_handler = { ++struct xattr_handler reiserfs_posix_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .get = posix_acl_access_get, + .set = posix_acl_access_set, +- .del = posix_acl_access_del, + .list = posix_acl_access_list, + }; + +@@ -522,32 +516,21 @@ posix_acl_default_set(struct inode *inod + return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); + } + +-static int posix_acl_default_del(struct inode *inode, const char *name) +-{ +- struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); +- if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1) +- return -EINVAL; +- iset_acl(inode, &reiserfs_i->i_acl_default, ERR_PTR(-ENODATA)); +- return 0; +-} +- +-static int +-posix_acl_default_list(struct inode *inode, const char *name, int namelen, +- char *out) ++static size_t posix_acl_default_list(struct inode *inode, char *list, ++ size_t list_size, const char *name, ++ size_t name_len) + { +- int len = namelen; ++ const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); + if (!reiserfs_posixacl(inode->i_sb)) + return 0; +- if (out) +- memcpy(out, name, len); +- +- return len; ++ if (list && size <= list_size) ++ memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); ++ return size; + } + +-struct reiserfs_xattr_handler posix_acl_default_handler = { ++struct xattr_handler reiserfs_posix_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .get = posix_acl_default_get, + .set = posix_acl_default_set, +- .del = posix_acl_default_del, + .list = posix_acl_default_list, + }; +--- a/fs/reiserfs/xattr.c ++++ b/fs/reiserfs/xattr.c +@@ -53,7 +53,6 @@ + #define PRIVROOT_NAME ".reiserfs_priv" + #define XAROOT_NAME "xattrs" + +-static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char *); + + /* Helpers for inode ops. We do this so that we don't have all the VFS + * overhead and also for proper i_mutex annotation. +@@ -110,7 +109,6 @@ static int xattr_rmdir(struct inode *dir + return error; + } + +- + #define xattr_may_create(flags) (!flags || flags & XATTR_CREATE) + + /* Returns and possibly creates the xattr dir. */ +@@ -339,14 +337,17 @@ int xattr_readdir(struct inode *inode, f + return res; + } + +-/* expects xadir->d_inode->i_mutex to be locked */ ++/* The following are side effects of other operations that aren't explicitly ++ * modifying extended attributes. This includes operations such as permissions ++ * or ownership changes, object deletions, etc. */ ++ + static int +-__reiserfs_xattr_del(struct dentry *xadir, const char *name, int namelen) ++reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen, ++ loff_t offset, u64 ino, unsigned int d_type) + { ++ struct dentry *xadir = (struct dentry *)buf; + struct dentry *dentry; +- struct inode *dir = xadir->d_inode; + int err = 0; +- struct reiserfs_xattr_handler *xah; + + dentry = lookup_one_len(name, xadir, namelen); + if (IS_ERR(dentry)) { +@@ -361,28 +362,7 @@ __reiserfs_xattr_del(struct dentry *xadi + if (S_ISDIR(dentry->d_inode->i_mode)) + goto out_file; + +- if (!IS_PRIVATE(dentry->d_inode)) { +- reiserfs_error(dir->i_sb, "jdm-20003", +- "OID %08x [%.*s/%.*s] doesn't have " +- "priv flag set [parent is %sset].", +- le32_to_cpu(INODE_PKEY(dentry->d_inode)-> +- k_objectid), xadir->d_name.len, +- xadir->d_name.name, namelen, name, +- IS_PRIVATE(xadir->d_inode) ? "" : +- "not "); +- dput(dentry); +- return -EIO; +- } +- +- /* Deletion pre-operation */ +- xah = find_xattr_handler_prefix(name); +- if (xah && xah->del) { +- err = xah->del(dentry->d_inode, name); +- if (err) +- goto out; +- } +- +- err = xattr_unlink(dir, dentry); ++ err = xattr_unlink(xadir->d_inode, dentry); + + out_file: + dput(dentry); +@@ -391,20 +371,6 @@ out: + return err; + } + +-/* The following are side effects of other operations that aren't explicitly +- * modifying extended attributes. This includes operations such as permissions +- * or ownership changes, object deletions, etc. */ +- +-static int +-reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen, +- loff_t offset, u64 ino, unsigned int d_type) +-{ +- struct dentry *xadir = (struct dentry *)buf; +- +- return __reiserfs_xattr_del(xadir, name, namelen); +- +-} +- + /* This is called w/ inode->i_mutex downed */ + int reiserfs_delete_xattrs(struct inode *inode) + { +@@ -541,14 +507,11 @@ out: + } + + #ifdef CONFIG_REISERFS_FS_XATTR +-static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char +- *prefix); +- + /* Returns a dentry corresponding to a specific extended attribute file + * for the inode. If flags allow, the file is created. Otherwise, a + * valid or negative dentry, or an error is returned. */ +-static struct dentry *get_xa_file_dentry(const struct inode *inode, +- const char *name, int flags) ++static struct dentry *xattr_lookup(struct inode *inode, const char *name, ++ int flags) + { + struct dentry *xadir, *xafile; + int err = 0; +@@ -623,6 +586,45 @@ int reiserfs_commit_write(struct file *f + int reiserfs_prepare_write(struct file *f, struct page *page, + unsigned from, unsigned to); + ++static void update_ctime(struct inode *inode) ++{ ++ struct timespec now = current_fs_time(inode->i_sb); ++ if (hlist_unhashed(&inode->i_hash) || !inode->i_nlink || ++ timespec_equal(&inode->i_ctime, &now)) ++ return; ++ ++ inode->i_ctime = CURRENT_TIME_SEC; ++ mark_inode_dirty(inode); ++} ++ ++static int lookup_and_delete_xattr(struct inode *inode, const char *name) ++{ ++ int err = 0; ++ struct dentry *dentry, *xadir; ++ ++ xadir = open_xa_dir(inode, XATTR_REPLACE); ++ if (IS_ERR(xadir)) ++ return PTR_ERR(xadir); ++ ++ dentry = lookup_one_len(name, xadir, strlen(name)); ++ if (IS_ERR(dentry)) { ++ err = PTR_ERR(dentry); ++ goto out_dput; ++ } ++ ++ if (dentry->d_inode) { ++ mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR); ++ err = xattr_unlink(xadir->d_inode, dentry); ++ mutex_unlock(&xadir->d_inode->i_mutex); ++ update_ctime(inode); ++ } ++ ++ dput(dentry); ++out_dput: ++ dput(xadir); ++ return err; ++} ++ + + /* Generic extended attribute operations that can be used by xa plugins */ + +@@ -630,8 +632,8 @@ int reiserfs_prepare_write(struct file * + * inode->i_mutex: down + */ + int +-reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, +- size_t buffer_size, int flags) ++__reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, ++ size_t buffer_size, int flags) + { + int err = 0; + struct dentry *dentry; +@@ -639,37 +641,22 @@ reiserfs_xattr_set(struct inode *inode, + char *data; + size_t file_pos = 0; + size_t buffer_pos = 0; +- struct iattr newattrs; ++ size_t new_size; + __u32 xahash = 0; + + if (get_inode_sd_version(inode) == STAT_DATA_V1) + return -EOPNOTSUPP; + + if (!buffer) +- return reiserfs_xattr_del(inode, name); ++ return lookup_and_delete_xattr(inode, name); + +- dentry = get_xa_file_dentry(inode, name, flags); +- if (IS_ERR(dentry)) { +- err = PTR_ERR(dentry); +- goto out; +- } ++ dentry = xattr_lookup(inode, name, flags); ++ if (IS_ERR(dentry)) ++ return PTR_ERR(dentry); + + down_write(&REISERFS_I(inode)->i_xattr_sem); + + xahash = xattr_hash(buffer, buffer_size); +- +- /* Resize it so we're ok to write there */ +- newattrs.ia_size = buffer_size; +- newattrs.ia_ctime = current_fs_time(inode->i_sb); +- newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; +- mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); +- down_write(&dentry->d_inode->i_alloc_sem); +- err = reiserfs_setattr(dentry, &newattrs); +- up_write(&dentry->d_inode->i_alloc_sem); +- mutex_unlock(&dentry->d_inode->i_mutex); +- if (err) +- goto out_filp; +- + while (buffer_pos < buffer_size || buffer_pos == 0) { + size_t chunk; + size_t skip = 0; +@@ -682,7 +669,7 @@ reiserfs_xattr_set(struct inode *inode, + page = reiserfs_get_page(dentry->d_inode, file_pos); + if (IS_ERR(page)) { + err = PTR_ERR(page); +- goto out_filp; ++ goto out_unlock; + } + + lock_page(page); +@@ -716,20 +703,33 @@ reiserfs_xattr_set(struct inode *inode, + break; + } + +- /* We can't mark the inode dirty if it's not hashed. This is the case +- * when we're inheriting the default ACL. If we dirty it, the inode +- * gets marked dirty, but won't (ever) make it onto the dirty list until +- * it's synced explicitly to clear I_DIRTY. This is bad. */ +- if (!hlist_unhashed(&inode->i_hash)) { +- inode->i_ctime = CURRENT_TIME_SEC; +- mark_inode_dirty(inode); +- } +- +- out_filp: ++ new_size = buffer_size + sizeof(struct reiserfs_xattr_header); ++ if (!err && new_size < i_size_read(dentry->d_inode)) { ++ struct iattr newattrs = { ++ .ia_ctime = current_fs_time(inode->i_sb), ++ .ia_size = buffer_size, ++ .ia_valid = ATTR_SIZE | ATTR_CTIME, ++ }; ++ mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); ++ down_write(&dentry->d_inode->i_alloc_sem); ++ err = reiserfs_setattr(dentry, &newattrs); ++ up_write(&dentry->d_inode->i_alloc_sem); ++ mutex_unlock(&dentry->d_inode->i_mutex); ++ } else ++ update_ctime(inode); ++out_unlock: + up_write(&REISERFS_I(inode)->i_xattr_sem); + dput(dentry); ++ return err; ++} + +- out: ++int ++reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, ++ size_t buffer_size, int flags) ++{ ++ int err = __reiserfs_xattr_set(inode, name, buffer, buffer_size, flags); ++ if (err == -ENODATA) ++ err = 0; + return err; + } + +@@ -737,7 +737,7 @@ reiserfs_xattr_set(struct inode *inode, + * inode->i_mutex: down + */ + int +-reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer, ++reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer, + size_t buffer_size) + { + ssize_t err = 0; +@@ -756,7 +756,7 @@ reiserfs_xattr_get(const struct inode *i + if (get_inode_sd_version(inode) == STAT_DATA_V1) + return -EOPNOTSUPP; + +- dentry = get_xa_file_dentry(inode, name, XATTR_REPLACE); ++ dentry = xattr_lookup(inode, name, XATTR_REPLACE); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + goto out; +@@ -837,32 +837,53 @@ out: + return err; + } + +-int reiserfs_xattr_del(struct inode *inode, const char *name) +-{ +- struct dentry *dir; +- int err; ++/* Actual operations that are exported to VFS-land */ ++struct xattr_handler *reiserfs_xattr_handlers[] = { ++ &reiserfs_xattr_user_handler, ++ &reiserfs_xattr_trusted_handler, ++#ifdef CONFIG_REISERFS_FS_SECURITY ++ &reiserfs_xattr_security_handler, ++#endif ++#ifdef CONFIG_REISERFS_FS_POSIX_ACL ++ &reiserfs_posix_acl_access_handler, ++ &reiserfs_posix_acl_default_handler, ++#endif ++ NULL ++}; + +- dir = open_xa_dir(inode, XATTR_REPLACE); +- if (IS_ERR(dir)) { +- err = PTR_ERR(dir); +- goto out; +- } ++/* ++ * In order to implement different sets of xattr operations for each xattr ++ * prefix with the generic xattr API, a filesystem should create a ++ * null-terminated array of struct xattr_handler (one for each prefix) and ++ * hang a pointer to it off of the s_xattr field of the superblock. ++ * ++ * The generic_fooxattr() functions will use this list to dispatch xattr ++ * operations to the correct xattr_handler. ++ */ ++#define for_each_xattr_handler(handlers, handler) \ ++ for ((handler) = *(handlers)++; \ ++ (handler) != NULL; \ ++ (handler) = *(handlers)++) + +- mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); +- err = __reiserfs_xattr_del(dir, name, strlen(name)); +- mutex_unlock(&dir->d_inode->i_mutex); +- dput(dir); ++/* This is the implementation for the xattr plugin infrastructure */ ++static inline struct xattr_handler * ++find_xattr_handler_prefix(struct xattr_handler **handlers, ++ const char *name) ++{ ++ struct xattr_handler *xah; + +- if (!err) { +- inode->i_ctime = CURRENT_TIME_SEC; +- mark_inode_dirty(inode); ++ if (!handlers) ++ return NULL; ++ ++ for_each_xattr_handler(handlers, xah) { ++ if (strncmp(xah->prefix, name, strlen(xah->prefix)) == 0) ++ break; + } + +- out: +- return err; ++ return xah; + } + +-/* Actual operations that are exported to VFS-land */ ++ + /* + * Inode operation getxattr() + */ +@@ -870,15 +891,15 @@ ssize_t + reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, + size_t size) + { +- struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name); +- int err; ++ struct inode *inode = dentry->d_inode; ++ struct xattr_handler *handler; + +- if (!xah || !reiserfs_xattrs(dentry->d_sb) || +- get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) ++ handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name); ++ ++ if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1) + return -EOPNOTSUPP; + +- err = xah->get(dentry->d_inode, name, buffer, size); +- return err; ++ return handler->get(inode, name, buffer, size); + } + + /* +@@ -890,15 +911,15 @@ int + reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags) + { +- struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name); +- int err; ++ struct inode *inode = dentry->d_inode; ++ struct xattr_handler *handler; + +- if (!xah || !reiserfs_xattrs(dentry->d_sb) || +- get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) ++ handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name); ++ ++ if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1) + return -EOPNOTSUPP; + +- err = xah->set(dentry->d_inode, name, value, size, flags); +- return err; ++ return handler->set(inode, name, value, size, flags); + } + + /* +@@ -908,71 +929,65 @@ reiserfs_setxattr(struct dentry *dentry, + */ + int reiserfs_removexattr(struct dentry *dentry, const char *name) + { +- int err; +- struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name); ++ struct inode *inode = dentry->d_inode; ++ struct xattr_handler *handler; ++ handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name); + +- if (!xah || !reiserfs_xattrs(dentry->d_sb) || +- get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) ++ if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1) + return -EOPNOTSUPP; + +- err = reiserfs_xattr_del(dentry->d_inode, name); +- +- dentry->d_inode->i_ctime = CURRENT_TIME_SEC; +- mark_inode_dirty(dentry->d_inode); +- +- return err; ++ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); + } + +-/* This is what filldir will use: +- * r_pos will always contain the amount of space required for the entire +- * list. If r_pos becomes larger than r_size, we need more space and we +- * return an error indicating this. If r_pos is less than r_size, then we've +- * filled the buffer successfully and we return success */ +-struct reiserfs_listxattr_buf { +- int r_pos; +- int r_size; +- char *r_buf; +- struct inode *r_inode; ++struct listxattr_buf { ++ size_t size; ++ size_t pos; ++ char *buf; ++ struct inode *inode; + }; + +-static int +-reiserfs_listxattr_filler(void *buf, const char *name, int namelen, +- loff_t offset, u64 ino, unsigned int d_type) ++static int listxattr_filler(void *buf, const char *name, int namelen, ++ loff_t offset, u64 ino, unsigned int d_type) + { +- struct reiserfs_listxattr_buf *b = (struct reiserfs_listxattr_buf *)buf; +- int len = 0; +- if (name[0] != '.' +- || (namelen != 1 && (name[1] != '.' || namelen != 2))) { +- struct reiserfs_xattr_handler *xah = +- find_xattr_handler_prefix(name); +- if (!xah) +- return 0; /* Unsupported xattr name, skip it */ +- +- /* We call ->list() twice because the operation isn't required to just +- * return the name back - we want to make sure we have enough space */ +- len += xah->list(b->r_inode, name, namelen, NULL); +- +- if (len) { +- if (b->r_pos + len + 1 <= b->r_size) { +- char *p = b->r_buf + b->r_pos; +- p += xah->list(b->r_inode, name, namelen, p); +- *p++ = '\0'; +- } +- b->r_pos += len + 1; ++ struct listxattr_buf *b = (struct listxattr_buf *)buf; ++ size_t size; ++ if (name[0] != '.' || ++ (namelen != 1 && (name[1] != '.' || namelen != 2))) { ++ struct xattr_handler *handler; ++ handler = find_xattr_handler_prefix(b->inode->i_sb->s_xattr, ++ name); ++ if (!handler) /* Unsupported xattr name */ ++ return 0; ++ if (b->buf) { ++ size = handler->list(b->inode, b->buf + b->pos, ++ b->size, name, namelen); ++ if (size > b->size) ++ return -ERANGE; ++ } else { ++ size = handler->list(b->inode, NULL, 0, name, namelen); + } +- } + ++ b->pos += size; ++ } + return 0; + } + + /* + * Inode operation listxattr() ++ * ++ * We totally ignore the generic listxattr here because it would be stupid ++ * not to. Since the xattrs are organized in a directory, we can just ++ * readdir to find them. + */ + ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) + { + struct dentry *dir; + int err = 0; +- struct reiserfs_listxattr_buf buf; ++ struct listxattr_buf buf = { ++ .inode = dentry->d_inode, ++ .buf = buffer, ++ .size = buffer ? size : 0, ++ }; + + if (!dentry->d_inode) + return -EINVAL; +@@ -985,120 +1000,22 @@ ssize_t reiserfs_listxattr(struct dentry + if (IS_ERR(dir)) { + err = PTR_ERR(dir); + if (err == -ENODATA) +- err = 0; /* Not an error if there aren't any xattrs */ ++ err = 0; /* Not an error if there aren't any xattrs */ + goto out; + } + +- buf.r_buf = buffer; +- buf.r_size = buffer ? size : 0; +- buf.r_pos = 0; +- buf.r_inode = dentry->d_inode; +- + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); +- err = xattr_readdir(dir->d_inode, reiserfs_listxattr_filler, &buf); ++ err = xattr_readdir(dir->d_inode, listxattr_filler, &buf); + mutex_unlock(&dir->d_inode->i_mutex); + +- if (!err) { +- if (buf.r_pos > buf.r_size && buffer != NULL) +- err = -ERANGE; +- else +- err = buf.r_pos; +- } ++ if (!err) ++ err = buf.pos; + + dput(dir); + out: + return err; + } + +-/* This is the implementation for the xattr plugin infrastructure */ +-static LIST_HEAD(xattr_handlers); +-static DEFINE_RWLOCK(handler_lock); +- +-static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char +- *prefix) +-{ +- struct reiserfs_xattr_handler *xah = NULL; +- struct list_head *p; +- +- read_lock(&handler_lock); +- list_for_each(p, &xattr_handlers) { +- xah = list_entry(p, struct reiserfs_xattr_handler, handlers); +- if (strncmp(xah->prefix, prefix, strlen(xah->prefix)) == 0) +- break; +- xah = NULL; +- } +- +- read_unlock(&handler_lock); +- return xah; +-} +- +-static void __unregister_handlers(void) +-{ +- struct reiserfs_xattr_handler *xah; +- struct list_head *p, *tmp; +- +- list_for_each_safe(p, tmp, &xattr_handlers) { +- xah = list_entry(p, struct reiserfs_xattr_handler, handlers); +- if (xah->exit) +- xah->exit(); +- +- list_del_init(p); +- } +- INIT_LIST_HEAD(&xattr_handlers); +-} +- +-int __init reiserfs_xattr_register_handlers(void) +-{ +- int err = 0; +- struct reiserfs_xattr_handler *xah; +- struct list_head *p; +- +- write_lock(&handler_lock); +- +- /* If we're already initialized, nothing to do */ +- if (!list_empty(&xattr_handlers)) { +- write_unlock(&handler_lock); +- return 0; +- } +- +- /* Add the handlers */ +- list_add_tail(&user_handler.handlers, &xattr_handlers); +- list_add_tail(&trusted_handler.handlers, &xattr_handlers); +-#ifdef CONFIG_REISERFS_FS_SECURITY +- list_add_tail(&security_handler.handlers, &xattr_handlers); +-#endif +-#ifdef CONFIG_REISERFS_FS_POSIX_ACL +- list_add_tail(&posix_acl_access_handler.handlers, &xattr_handlers); +- list_add_tail(&posix_acl_default_handler.handlers, &xattr_handlers); +-#endif +- +- /* Run initializers, if available */ +- list_for_each(p, &xattr_handlers) { +- xah = list_entry(p, struct reiserfs_xattr_handler, handlers); +- if (xah->init) { +- err = xah->init(); +- if (err) { +- list_del_init(p); +- break; +- } +- } +- } +- +- /* Clean up other handlers, if any failed */ +- if (err) +- __unregister_handlers(); +- +- write_unlock(&handler_lock); +- return err; +-} +- +-void reiserfs_xattr_unregister_handlers(void) +-{ +- write_lock(&handler_lock); +- __unregister_handlers(); +- write_unlock(&handler_lock); +-} +- + static int reiserfs_check_acl(struct inode *inode, int mask) + { + struct posix_acl *acl; +@@ -1157,20 +1074,16 @@ static int xattr_mount_check(struct supe + { + /* We need generation numbers to ensure that the oid mapping is correct + * v3.5 filesystems don't have them. */ +- if (!old_format_only(s)) { +- set_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt)); +- } else if (reiserfs_xattrs_optional(s)) { +- /* Old format filesystem, but optional xattrs have been enabled +- * at mount time. Error out. */ +- reiserfs_warning(s, "jdm-20005", +- "xattrs/ACLs not supported on pre v3.6 " +- "format filesystem. Failing mount."); +- return -EOPNOTSUPP; +- } else { +- /* Old format filesystem, but no optional xattrs have +- * been enabled. This means we silently disable xattrs +- * on the filesystem. */ +- clear_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt)); ++ if (old_format_only(s)) { ++ if (reiserfs_xattrs_optional(s)) { ++ /* Old format filesystem, but optional xattrs have ++ * been enabled. Error out. */ ++ reiserfs_warning(s, "jdm-2005", ++ "xattrs/ACLs not supported " ++ "on pre-v3.6 format filesystems. " ++ "Failing mount."); ++ return -EOPNOTSUPP; ++ } + } + + return 0; +@@ -1251,9 +1164,11 @@ int reiserfs_xattr_init(struct super_blo + } + + #ifdef CONFIG_REISERFS_FS_XATTR ++ if (!err) ++ s->s_xattr = reiserfs_xattr_handlers; ++ + error: + if (err) { +- clear_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt)); + clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt)); + clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt)); + } +--- a/fs/reiserfs/xattr_security.c ++++ b/fs/reiserfs/xattr_security.c +@@ -31,35 +31,25 @@ security_set(struct inode *inode, const + return reiserfs_xattr_set(inode, name, buffer, size, flags); + } + +-static int security_del(struct inode *inode, const char *name) ++static size_t security_list(struct inode *inode, char *list, size_t list_len, ++ const char *name, size_t namelen) + { +- if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) +- return -EINVAL; +- +- if (IS_PRIVATE(inode)) +- return -EPERM; +- +- return 0; +-} +- +-static int +-security_list(struct inode *inode, const char *name, int namelen, char *out) +-{ +- int len = namelen; ++ const size_t len = namelen + 1; + + if (IS_PRIVATE(inode)) + return 0; + +- if (out) +- memcpy(out, name, len); ++ if (list && len <= list_len) { ++ memcpy(list, name, namelen); ++ list[namelen] = '\0'; ++ } + + return len; + } + +-struct reiserfs_xattr_handler security_handler = { ++struct xattr_handler reiserfs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .get = security_get, + .set = security_set, +- .del = security_del, + .list = security_list, + }; +--- a/fs/reiserfs/xattr_trusted.c ++++ b/fs/reiserfs/xattr_trusted.c +@@ -13,10 +13,7 @@ trusted_get(struct inode *inode, const c + if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) + return -EINVAL; + +- if (!reiserfs_xattrs(inode->i_sb)) +- return -EOPNOTSUPP; +- +- if (!(capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))) ++ if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) + return -EPERM; + + return reiserfs_xattr_get(inode, name, buffer, size); +@@ -29,50 +26,30 @@ trusted_set(struct inode *inode, const c + if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) + return -EINVAL; + +- if (!reiserfs_xattrs(inode->i_sb)) +- return -EOPNOTSUPP; +- +- if (!(capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))) ++ if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) + return -EPERM; + + return reiserfs_xattr_set(inode, name, buffer, size, flags); + } + +-static int trusted_del(struct inode *inode, const char *name) ++static size_t trusted_list(struct inode *inode, char *list, size_t list_size, ++ const char *name, size_t name_len) + { +- if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) +- return -EINVAL; ++ const size_t len = name_len + 1; + +- if (!reiserfs_xattrs(inode->i_sb)) +- return -EOPNOTSUPP; +- +- if (!(capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))) +- return -EPERM; +- +- return 0; +-} +- +-static int +-trusted_list(struct inode *inode, const char *name, int namelen, char *out) +-{ +- int len = namelen; +- +- if (!reiserfs_xattrs(inode->i_sb)) ++ if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) + return 0; + +- if (!(capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))) +- return 0; +- +- if (out) +- memcpy(out, name, len); +- ++ if (list && len <= list_size) { ++ memcpy(list, name, name_len); ++ list[name_len] = '\0'; ++ } + return len; + } + +-struct reiserfs_xattr_handler trusted_handler = { ++struct xattr_handler reiserfs_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .get = trusted_get, + .set = trusted_set, +- .del = trusted_del, + .list = trusted_list, + }; +--- a/fs/reiserfs/xattr_user.c ++++ b/fs/reiserfs/xattr_user.c +@@ -6,10 +6,6 @@ + #include + #include + +-#ifdef CONFIG_REISERFS_FS_POSIX_ACL +-# include +-#endif +- + static int + user_get(struct inode *inode, const char *name, void *buffer, size_t size) + { +@@ -25,7 +21,6 @@ static int + user_set(struct inode *inode, const char *name, const void *buffer, + size_t size, int flags) + { +- + if (strlen(name) < sizeof(XATTR_USER_PREFIX)) + return -EINVAL; + +@@ -34,33 +29,23 @@ user_set(struct inode *inode, const char + return reiserfs_xattr_set(inode, name, buffer, size, flags); + } + +-static int user_del(struct inode *inode, const char *name) ++static size_t user_list(struct inode *inode, char *list, size_t list_size, ++ const char *name, size_t name_len) + { +- if (strlen(name) < sizeof(XATTR_USER_PREFIX)) +- return -EINVAL; +- +- if (!reiserfs_xattrs_user(inode->i_sb)) +- return -EOPNOTSUPP; +- return 0; +-} ++ const size_t len = name_len + 1; + +-static int +-user_list(struct inode *inode, const char *name, int namelen, char *out) +-{ +- int len = namelen; + if (!reiserfs_xattrs_user(inode->i_sb)) + return 0; +- +- if (out) +- memcpy(out, name, len); +- ++ if (list && len <= list_size) { ++ memcpy(list, name, name_len); ++ list[name_len] = '\0'; ++ } + return len; + } + +-struct reiserfs_xattr_handler user_handler = { ++struct xattr_handler reiserfs_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .get = user_get, + .set = user_set, +- .del = user_del, + .list = user_list, + }; +--- a/include/linux/reiserfs_acl.h ++++ b/include/linux/reiserfs_acl.h +@@ -52,10 +52,8 @@ int reiserfs_acl_chmod(struct inode *ino + int reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry, + struct inode *inode); + int reiserfs_cache_default_acl(struct inode *dir); +-extern int reiserfs_xattr_posix_acl_init(void) __init; +-extern int reiserfs_xattr_posix_acl_exit(void); +-extern struct reiserfs_xattr_handler posix_acl_default_handler; +-extern struct reiserfs_xattr_handler posix_acl_access_handler; ++extern struct xattr_handler reiserfs_posix_acl_default_handler; ++extern struct xattr_handler reiserfs_posix_acl_access_handler; + + static inline void reiserfs_init_acl_access(struct inode *inode) + { +@@ -75,16 +73,6 @@ static inline struct posix_acl *reiserfs + return NULL; + } + +-static inline int reiserfs_xattr_posix_acl_init(void) +-{ +- return 0; +-} +- +-static inline int reiserfs_xattr_posix_acl_exit(void) +-{ +- return 0; +-} +- + static inline int reiserfs_acl_chmod(struct inode *inode) + { + return 0; +--- a/include/linux/reiserfs_fs_sb.h ++++ b/include/linux/reiserfs_fs_sb.h +@@ -450,7 +450,6 @@ enum reiserfs_mount_options { + REISERFS_NO_UNHASHED_RELOCATION, + REISERFS_HASHED_RELOCATION, + REISERFS_ATTRS, +- REISERFS_XATTRS, + REISERFS_XATTRS_USER, + REISERFS_POSIXACL, + REISERFS_BARRIER_NONE, +@@ -488,7 +487,7 @@ enum reiserfs_mount_options { + #define reiserfs_data_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_LOG)) + #define reiserfs_data_ordered(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_ORDERED)) + #define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK)) +-#define reiserfs_xattrs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS)) ++#define reiserfs_xattrs(s) ((s)->s_xattr != NULL) + #define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER)) + #define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL)) + #define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s)) +--- a/include/linux/reiserfs_xattr.h ++++ b/include/linux/reiserfs_xattr.h +@@ -29,20 +29,6 @@ struct iattr; + struct super_block; + struct nameidata; + +-struct reiserfs_xattr_handler { +- char *prefix; +- int (*init) (void); +- void (*exit) (void); +- int (*get) (struct inode * inode, const char *name, void *buffer, +- size_t size); +- int (*set) (struct inode * inode, const char *name, const void *buffer, +- size_t size, int flags); +- int (*del) (struct inode * inode, const char *name); +- int (*list) (struct inode * inode, const char *name, int namelen, +- char *out); +- struct list_head handlers; +-}; +- + int reiserfs_xattr_register_handlers(void) __init; + void reiserfs_xattr_unregister_handlers(void); + int reiserfs_xattr_init(struct super_block *sb, int mount_flags); +@@ -59,13 +45,14 @@ ssize_t reiserfs_listxattr(struct dentry + int reiserfs_removexattr(struct dentry *dentry, const char *name); + int reiserfs_permission(struct inode *inode, int mask); + +-int reiserfs_xattr_del(struct inode *, const char *); +-int reiserfs_xattr_get(const struct inode *, const char *, void *, size_t); ++int reiserfs_xattr_get(struct inode *, const char *, void *, size_t); ++int __reiserfs_xattr_set(struct inode *, const char *, const void *, ++ size_t, int); + int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int); + +-extern struct reiserfs_xattr_handler user_handler; +-extern struct reiserfs_xattr_handler trusted_handler; +-extern struct reiserfs_xattr_handler security_handler; ++extern struct xattr_handler reiserfs_xattr_user_handler; ++extern struct xattr_handler reiserfs_xattr_trusted_handler; ++extern struct xattr_handler reiserfs_xattr_security_handler; + + static inline void reiserfs_init_xattr_rwsem(struct inode *inode) + { diff --git a/src/patches/suse-2.6.27.25/patches.suse/reiserfs-use-reiserfs_error.diff b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-use-reiserfs_error.diff new file mode 100644 index 0000000000..e2c3a04381 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/reiserfs-use-reiserfs_error.diff @@ -0,0 +1,512 @@ +From: Jeff Mahoney +Subject: reiserfs: use reiserfs_error() + + This patch makes many paths that are currently using warnings to handle + the error. + +Signed-off-by: Jeff Mahoney + +-- + + fs/reiserfs/bitmap.c | 56 +++++++++++++++++++++--------------------- + fs/reiserfs/inode.c | 45 +++++++++++++++------------------ + fs/reiserfs/lbalance.c | 20 +++++++-------- + fs/reiserfs/namei.c | 24 +++++++++--------- + fs/reiserfs/objectid.c | 4 +-- + fs/reiserfs/stree.c | 26 +++++++++---------- + fs/reiserfs/super.c | 15 +++++------ + fs/reiserfs/tail_conversion.c | 6 ++-- + fs/reiserfs/xattr.c | 21 +++++++-------- + 9 files changed, 107 insertions(+), 110 deletions(-) + +--- a/fs/reiserfs/bitmap.c ++++ b/fs/reiserfs/bitmap.c +@@ -64,9 +64,9 @@ int is_reusable(struct super_block *s, b + unsigned int bmap_count = reiserfs_bmap_count(s); + + if (block == 0 || block >= SB_BLOCK_COUNT(s)) { +- reiserfs_warning(s, "vs-4010", +- "block number is out of range %lu (%u)", +- block, SB_BLOCK_COUNT(s)); ++ reiserfs_error(s, "vs-4010", ++ "block number is out of range %lu (%u)", ++ block, SB_BLOCK_COUNT(s)); + return 0; + } + +@@ -79,30 +79,30 @@ int is_reusable(struct super_block *s, b + b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1; + if (block >= bmap1 && + block <= bmap1 + bmap_count) { +- reiserfs_warning(s, "vs-4019", "bitmap block %lu(%u) " +- "can't be freed or reused", +- block, bmap_count); ++ reiserfs_error(s, "vs-4019", "bitmap block %lu(%u) " ++ "can't be freed or reused", ++ block, bmap_count); + return 0; + } + } else { + if (offset == 0) { +- reiserfs_warning(s, "vs-4020", "bitmap block %lu(%u) " +- "can't be freed or reused", +- block, bmap_count); ++ reiserfs_error(s, "vs-4020", "bitmap block %lu(%u) " ++ "can't be freed or reused", ++ block, bmap_count); + return 0; + } + } + + if (bmap >= bmap_count) { +- reiserfs_warning(s, "vs-4030", "bitmap for requested block " +- "is out of range: block=%lu, bitmap_nr=%u", +- block, bmap); ++ reiserfs_error(s, "vs-4030", "bitmap for requested block " ++ "is out of range: block=%lu, bitmap_nr=%u", ++ block, bmap); + return 0; + } + + if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) { +- reiserfs_warning(s, "vs-4050", "this is root block (%u), " +- "it must be busy", SB_ROOT_BLOCK(s)); ++ reiserfs_error(s, "vs-4050", "this is root block (%u), " ++ "it must be busy", SB_ROOT_BLOCK(s)); + return 0; + } + +@@ -153,8 +153,8 @@ static int scan_bitmap_block(struct reis + /* - I mean `a window of zero bits' as in description of this function - Zam. */ + + if (!bi) { +- reiserfs_warning(s, "jdm-4055", "NULL bitmap info pointer " +- "for bitmap %d", bmap_n); ++ reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer " ++ "for bitmap %d", bmap_n); + return 0; + } + +@@ -399,8 +399,8 @@ static void _reiserfs_free_block(struct + get_bit_address(s, block, &nr, &offset); + + if (nr >= reiserfs_bmap_count(s)) { +- reiserfs_warning(s, "vs-4075", "block %lu is out of range", +- block); ++ reiserfs_error(s, "vs-4075", "block %lu is out of range", ++ block); + return; + } + +@@ -412,8 +412,8 @@ static void _reiserfs_free_block(struct + + /* clear bit for the given block in bit map */ + if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) { +- reiserfs_warning(s, "vs-4080", +- "block %lu: bit already cleared", block); ++ reiserfs_error(s, "vs-4080", ++ "block %lu: bit already cleared", block); + } + apbi[nr].free_count++; + journal_mark_dirty(th, s, bmbh); +@@ -440,7 +440,7 @@ void reiserfs_free_block(struct reiserfs + return; + + if (block > sb_block_count(REISERFS_SB(s)->s_rs)) { +- reiserfs_panic(th->t_super, "bitmap-4072", ++ reiserfs_error(th->t_super, "bitmap-4072", + "Trying to free block outside file system " + "boundaries (%lu > %lu)", + block, sb_block_count(REISERFS_SB(s)->s_rs)); +@@ -472,8 +472,8 @@ static void __discard_prealloc(struct re + BUG_ON(!th->t_trans_id); + #ifdef CONFIG_REISERFS_CHECK + if (ei->i_prealloc_count < 0) +- reiserfs_warning(th->t_super, "zam-4001", +- "inode has negative prealloc blocks count."); ++ reiserfs_error(th->t_super, "zam-4001", ++ "inode has negative prealloc blocks count."); + #endif + while (ei->i_prealloc_count > 0) { + reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block); +@@ -509,9 +509,9 @@ void reiserfs_discard_all_prealloc(struc + i_prealloc_list); + #ifdef CONFIG_REISERFS_CHECK + if (!ei->i_prealloc_count) { +- reiserfs_warning(th->t_super, "zam-4001", +- "inode is in prealloc list but has " +- "no preallocated blocks."); ++ reiserfs_error(th->t_super, "zam-4001", ++ "inode is in prealloc list but has " ++ "no preallocated blocks."); + } + #endif + __discard_prealloc(th, ei); +@@ -1214,7 +1214,9 @@ void reiserfs_cache_bitmap_metadata(stru + unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size); + + /* The first bit must ALWAYS be 1 */ +- BUG_ON(!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data)); ++ if (!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data)) ++ reiserfs_error(sb, "reiserfs-2025", "bitmap block %lu is " ++ "corrupted: first bit must be 1", bh->b_blocknr); + + info->free_count = 0; + +--- a/fs/reiserfs/inode.c ++++ b/fs/reiserfs/inode.c +@@ -841,12 +841,12 @@ int reiserfs_get_block(struct inode *ino + tail_offset); + if (retval) { + if (retval != -ENOSPC) +- reiserfs_warning(inode->i_sb, +- "clm-6004", +- "convert tail failed " +- "inode %lu, error %d", +- inode->i_ino, +- retval); ++ reiserfs_error(inode->i_sb, ++ "clm-6004", ++ "convert tail failed " ++ "inode %lu, error %d", ++ inode->i_ino, ++ retval); + if (allocated_block_nr) { + /* the bitmap, the super, and the stat data == 3 */ + if (!th) +@@ -1332,10 +1332,9 @@ void reiserfs_update_sd_size(struct reis + /* look for the object's stat data */ + retval = search_item(inode->i_sb, &key, &path); + if (retval == IO_ERROR) { +- reiserfs_warning(inode->i_sb, "vs-13050", +- "i/o failure occurred trying to " +- "update %K stat data", +- &key); ++ reiserfs_error(inode->i_sb, "vs-13050", ++ "i/o failure occurred trying to " ++ "update %K stat data", &key); + return; + } + if (retval == ITEM_NOT_FOUND) { +@@ -1424,9 +1423,9 @@ void reiserfs_read_locked_inode(struct i + /* look for the object's stat data */ + retval = search_item(inode->i_sb, &key, &path_to_sd); + if (retval == IO_ERROR) { +- reiserfs_warning(inode->i_sb, "vs-13070", +- "i/o failure occurred trying to find " +- "stat data of %K", &key); ++ reiserfs_error(inode->i_sb, "vs-13070", ++ "i/o failure occurred trying to find " ++ "stat data of %K", &key); + reiserfs_make_bad_inode(inode); + return; + } +@@ -1687,8 +1686,8 @@ static int reiserfs_new_directory(struct + /* look for place in the tree for new item */ + retval = search_item(sb, &key, path); + if (retval == IO_ERROR) { +- reiserfs_warning(sb, "vs-13080", +- "i/o failure occurred creating new directory"); ++ reiserfs_error(sb, "vs-13080", ++ "i/o failure occurred creating new directory"); + return -EIO; + } + if (retval == ITEM_FOUND) { +@@ -1727,8 +1726,8 @@ static int reiserfs_new_symlink(struct r + /* look for place in the tree for new item */ + retval = search_item(sb, &key, path); + if (retval == IO_ERROR) { +- reiserfs_warning(sb, "vs-13080", +- "i/o failure occurred creating new symlink"); ++ reiserfs_error(sb, "vs-13080", ++ "i/o failure occurred creating new symlink"); + return -EIO; + } + if (retval == ITEM_FOUND) { +@@ -2048,10 +2047,8 @@ static int grab_tail_page(struct inode * + ** I've screwed up the code to find the buffer, or the code to + ** call prepare_write + */ +- reiserfs_warning(p_s_inode->i_sb, "clm-6000", +- "error reading block %lu on dev %s", +- bh->b_blocknr, +- reiserfs_bdevname(p_s_inode->i_sb)); ++ reiserfs_error(p_s_inode->i_sb, "clm-6000", ++ "error reading block %lu", bh->b_blocknr); + error = -EIO; + goto unlock; + } +@@ -2093,9 +2090,9 @@ int reiserfs_truncate_file(struct inode + // and get_block_create_0 could not find a block to read in, + // which is ok. + if (error != -ENOENT) +- reiserfs_warning(p_s_inode->i_sb, "clm-6001", +- "grab_tail_page failed %d", +- error); ++ reiserfs_error(p_s_inode->i_sb, "clm-6001", ++ "grab_tail_page failed %d", ++ error); + page = NULL; + bh = NULL; + } +--- a/fs/reiserfs/lbalance.c ++++ b/fs/reiserfs/lbalance.c +@@ -1291,17 +1291,17 @@ void leaf_paste_entries(struct buffer_in + prev = (i != 0) ? deh_location(&(deh[i - 1])) : 0; + + if (prev && prev <= deh_location(&(deh[i]))) +- reiserfs_warning(NULL, "vs-10240", +- "directory item (%h) " +- "corrupted (prev %a, " +- "cur(%d) %a)", +- ih, deh + i - 1, i, deh + i); ++ reiserfs_error(sb_from_bi(bi), "vs-10240", ++ "directory item (%h) " ++ "corrupted (prev %a, " ++ "cur(%d) %a)", ++ ih, deh + i - 1, i, deh + i); + if (next && next >= deh_location(&(deh[i]))) +- reiserfs_warning(NULL, "vs-10250", +- "directory item (%h) " +- "corrupted (cur(%d) %a, " +- "next %a)", +- ih, i, deh + i, deh + i + 1); ++ reiserfs_error(sb_from_bi(bi), "vs-10250", ++ "directory item (%h) " ++ "corrupted (cur(%d) %a, " ++ "next %a)", ++ ih, i, deh + i, deh + i + 1); + } + } + #endif +--- a/fs/reiserfs/namei.c ++++ b/fs/reiserfs/namei.c +@@ -120,8 +120,8 @@ int search_by_entry_key(struct super_blo + switch (retval) { + case ITEM_NOT_FOUND: + if (!PATH_LAST_POSITION(path)) { +- reiserfs_warning(sb, "vs-7000", "search_by_key " +- "returned item position == 0"); ++ reiserfs_error(sb, "vs-7000", "search_by_key " ++ "returned item position == 0"); + pathrelse(path); + return IO_ERROR; + } +@@ -135,7 +135,7 @@ int search_by_entry_key(struct super_blo + + default: + pathrelse(path); +- reiserfs_warning(sb, "vs-7002", "no path to here"); ++ reiserfs_error(sb, "vs-7002", "no path to here"); + return IO_ERROR; + } + +@@ -298,7 +298,7 @@ static int reiserfs_find_entry(struct in + search_by_entry_key(dir->i_sb, &key_to_search, + path_to_entry, de); + if (retval == IO_ERROR) { +- reiserfs_warning(dir->i_sb, "zam-7001", "io error"); ++ reiserfs_error(dir->i_sb, "zam-7001", "io error"); + return IO_ERROR; + } + +@@ -490,9 +490,9 @@ static int reiserfs_add_entry(struct rei + } + + if (retval != NAME_FOUND) { +- reiserfs_warning(dir->i_sb, "zam-7002", +- "reiserfs_find_entry() returned " +- "unexpected value (%d)", retval); ++ reiserfs_error(dir->i_sb, "zam-7002", ++ "reiserfs_find_entry() returned " ++ "unexpected value (%d)", retval); + } + + return -EEXIST; +@@ -902,9 +902,9 @@ static int reiserfs_rmdir(struct inode * + goto end_rmdir; + + if (inode->i_nlink != 2 && inode->i_nlink != 1) +- reiserfs_warning(inode->i_sb, "reiserfs-7040", +- "empty directory has nlink != 2 (%d)", +- inode->i_nlink); ++ reiserfs_error(inode->i_sb, "reiserfs-7040", ++ "empty directory has nlink != 2 (%d)", ++ inode->i_nlink); + + clear_nlink(inode); + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; +@@ -1495,8 +1495,8 @@ static int reiserfs_rename(struct inode + if (reiserfs_cut_from_item + (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL, + 0) < 0) +- reiserfs_warning(old_dir->i_sb, "vs-7060", +- "couldn't not cut old name. Fsck later?"); ++ reiserfs_error(old_dir->i_sb, "vs-7060", ++ "couldn't not cut old name. Fsck later?"); + + old_dir->i_size -= DEH_SIZE + old_de.de_entrylen; + +--- a/fs/reiserfs/objectid.c ++++ b/fs/reiserfs/objectid.c +@@ -159,8 +159,8 @@ void reiserfs_release_objectid(struct re + i += 2; + } + +- reiserfs_warning(s, "vs-15011", "tried to free free object id (%lu)", +- (long unsigned)objectid_to_release); ++ reiserfs_error(s, "vs-15011", "tried to free free object id (%lu)", ++ (long unsigned)objectid_to_release); + } + + int reiserfs_convert_objectid_map_v1(struct super_block *s) +--- a/fs/reiserfs/stree.c ++++ b/fs/reiserfs/stree.c +@@ -720,9 +720,9 @@ int search_by_key(struct super_block *p_ + // make sure, that the node contents look like a node of + // certain level + if (!is_tree_node(p_s_bh, expected_level)) { +- reiserfs_warning(p_s_sb, "vs-5150", +- "invalid format found in block %ld. " +- "Fsck?", p_s_bh->b_blocknr); ++ reiserfs_error(p_s_sb, "vs-5150", ++ "invalid format found in block %ld. " ++ "Fsck?", p_s_bh->b_blocknr); + pathrelse(p_s_search_path); + return IO_ERROR; + } +@@ -1336,9 +1336,9 @@ void reiserfs_delete_solid_item(struct r + while (1) { + retval = search_item(th->t_super, &cpu_key, &path); + if (retval == IO_ERROR) { +- reiserfs_warning(th->t_super, "vs-5350", +- "i/o failure occurred trying " +- "to delete %K", &cpu_key); ++ reiserfs_error(th->t_super, "vs-5350", ++ "i/o failure occurred trying " ++ "to delete %K", &cpu_key); + break; + } + if (retval != ITEM_FOUND) { +@@ -1737,7 +1737,7 @@ static void truncate_directory(struct re + { + BUG_ON(!th->t_trans_id); + if (inode->i_nlink) +- reiserfs_warning(inode->i_sb, "vs-5655", "link count != 0"); ++ reiserfs_error(inode->i_sb, "vs-5655", "link count != 0"); + + set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET); + set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY); +@@ -1790,16 +1790,16 @@ int reiserfs_do_truncate(struct reiserfs + search_for_position_by_key(p_s_inode->i_sb, &s_item_key, + &s_search_path); + if (retval == IO_ERROR) { +- reiserfs_warning(p_s_inode->i_sb, "vs-5657", +- "i/o failure occurred trying to truncate %K", +- &s_item_key); ++ reiserfs_error(p_s_inode->i_sb, "vs-5657", ++ "i/o failure occurred trying to truncate %K", ++ &s_item_key); + err = -EIO; + goto out; + } + if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) { +- reiserfs_warning(p_s_inode->i_sb, "PAP-5660", +- "wrong result %d of search for %K", retval, +- &s_item_key); ++ reiserfs_error(p_s_inode->i_sb, "PAP-5660", ++ "wrong result %d of search for %K", retval, ++ &s_item_key); + + err = -EIO; + goto out; +--- a/fs/reiserfs/super.c ++++ b/fs/reiserfs/super.c +@@ -193,9 +193,8 @@ static int finish_unfinished(struct supe + while (!retval) { + retval = search_item(s, &max_cpu_key, &path); + if (retval != ITEM_NOT_FOUND) { +- reiserfs_warning(s, "vs-2140", +- "search_by_key returned %d", +- retval); ++ reiserfs_error(s, "vs-2140", ++ "search_by_key returned %d", retval); + break; + } + +@@ -376,9 +375,9 @@ void add_save_link(struct reiserfs_trans + retval = search_item(inode->i_sb, &key, &path); + if (retval != ITEM_NOT_FOUND) { + if (retval != -ENOSPC) +- reiserfs_warning(inode->i_sb, "vs-2100", +- "search_by_key (%K) returned %d", &key, +- retval); ++ reiserfs_error(inode->i_sb, "vs-2100", ++ "search_by_key (%K) returned %d", &key, ++ retval); + pathrelse(&path); + return; + } +@@ -391,8 +390,8 @@ void add_save_link(struct reiserfs_trans + reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link); + if (retval) { + if (retval != -ENOSPC) +- reiserfs_warning(inode->i_sb, "vs-2120", +- "insert_item returned %d", retval); ++ reiserfs_error(inode->i_sb, "vs-2120", ++ "insert_item returned %d", retval); + } else { + if (truncate) + REISERFS_I(inode)->i_flags |= +--- a/fs/reiserfs/tail_conversion.c ++++ b/fs/reiserfs/tail_conversion.c +@@ -48,9 +48,9 @@ int direct2indirect(struct reiserfs_tran + + // FIXME: we could avoid this + if (search_for_position_by_key(sb, &end_key, path) == POSITION_FOUND) { +- reiserfs_warning(sb, "PAP-14030", +- "pasted or inserted byte exists in " +- "the tree %K. Use fsck to repair.", &end_key); ++ reiserfs_error(sb, "PAP-14030", ++ "pasted or inserted byte exists in " ++ "the tree %K. Use fsck to repair.", &end_key); + pathrelse(path); + return -EIO; + } +--- a/fs/reiserfs/xattr.c ++++ b/fs/reiserfs/xattr.c +@@ -259,8 +259,8 @@ static int __xattr_readdir(struct inode + ih = de.de_ih; + + if (!is_direntry_le_ih(ih)) { +- reiserfs_warning(inode->i_sb, "jdm-20000", +- "not direntry %h", ih); ++ reiserfs_error(inode->i_sb, "jdm-20000", ++ "not direntry %h", ih); + break; + } + copy_item_head(&tmp_ih, ih); +@@ -653,15 +653,14 @@ __reiserfs_xattr_del(struct dentry *xadi + goto out_file; + + if (!is_reiserfs_priv_object(dentry->d_inode)) { +- reiserfs_warning(dir->i_sb, "jdm-20003", +- "OID %08x [%.*s/%.*s] doesn't have " +- "priv flag set [parent is %sset].", +- le32_to_cpu(INODE_PKEY(dentry->d_inode)-> +- k_objectid), xadir->d_name.len, +- xadir->d_name.name, namelen, name, +- is_reiserfs_priv_object(xadir-> +- d_inode) ? "" : +- "not "); ++ reiserfs_error(dir->i_sb, "jdm-20003", ++ "OID %08x [%.*s/%.*s] doesn't have " ++ "priv flag set [parent is %sset].", ++ le32_to_cpu(INODE_PKEY(dentry->d_inode)-> ++ k_objectid), xadir->d_name.len, ++ xadir->d_name.name, namelen, name, ++ is_reiserfs_priv_object(xadir->d_inode) ? "" : ++ "not "); + dput(dentry); + return -EIO; + } diff --git a/src/patches/suse-2.6.27.25/patches.suse/s390-Kerntypes.diff b/src/patches/suse-2.6.27.25/patches.suse/s390-Kerntypes.diff new file mode 100644 index 0000000000..6cad1eec92 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/s390-Kerntypes.diff @@ -0,0 +1,320 @@ +From: Michael Holzheu +Subject: [PATCH] Generate Kerntypes file +Patch-mainline: never +References: bnc #471422 + +Since dwarfextract doesn't produce a correct dwarf Kerntypes, +we produce it with the compiler again. + + +Signed-off-by: Michael Holzheu +Acked-by: Bernhard Walle + +--- + arch/s390/boot/Makefile | 2 + arch/s390/boot/kerntypes.c | 289 +++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 290 insertions(+), 1 deletion(-) + +--- a/arch/s390/boot/Makefile ++++ b/arch/s390/boot/Makefile +@@ -8,7 +8,7 @@ COMPILE_VERSION := __linux_compile_versi + + EXTRA_CFLAGS := -DCOMPILE_VERSION=$(COMPILE_VERSION) -gstabs -I. + +-targets := image ++targets := image kerntypes.o + + $(obj)/image: vmlinux FORCE + $(call if_changed,objcopy) +--- /dev/null ++++ b/arch/s390/boot/kerntypes.c +@@ -0,0 +1,289 @@ ++/* ++ * kerntypes.c ++ * ++ * Dummy module that includes headers for all kernel types of interest. ++ * The kernel type information is used by the lcrash utility when ++ * analyzing system crash dumps or the live system. Using the type ++ * information for the running system, rather than kernel header files, ++ * makes for a more flexible and robust analysis tool. ++ * ++ * This source code is released under the GNU GPL. ++ */ ++ ++/* generate version for this file */ ++typedef char *COMPILE_VERSION; ++ ++/* General linux types */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_SLUB ++ #include ++#else ++ #include ++#endif ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * s390 specific includes ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* channel subsystem driver */ ++#include "drivers/s390/cio/cio.h" ++#include "drivers/s390/cio/chsc.h" ++#include "drivers/s390/cio/css.h" ++#include "drivers/s390/cio/device.h" ++#include "drivers/s390/cio/chsc_sch.h" ++ ++/* dasd device driver */ ++#include "drivers/s390/block/dasd_int.h" ++#include "drivers/s390/block/dasd_diag.h" ++#include "drivers/s390/block/dasd_eckd.h" ++#include "drivers/s390/block/dasd_fba.h" ++ ++/* networking drivers */ ++#include "include/net/iucv/iucv.h" ++#include "drivers/s390/net/fsm.h" ++#include "drivers/s390/net/ctcm_main.h" ++#include "drivers/s390/net/ctcm_fsms.h" ++#include "drivers/s390/net/lcs.h" ++#include "drivers/s390/net/qeth_core.h" ++#include "drivers/s390/net/qeth_core_mpc.h" ++#include "drivers/s390/net/qeth_core_offl.h" ++#include "drivers/s390/net/qeth_l3.h" ++ ++/* zfcp device driver */ ++#include "drivers/s390/scsi/zfcp_def.h" ++#include "drivers/s390/scsi/zfcp_fsf.h" ++ ++/* crypto device driver */ ++#include "drivers/s390/crypto/ap_bus.h" ++#include "drivers/s390/crypto/zcrypt_api.h" ++#include "drivers/s390/crypto/zcrypt_cca_key.h" ++#include "drivers/s390/crypto/zcrypt_pcica.h" ++#include "drivers/s390/crypto/zcrypt_pcicc.h" ++#include "drivers/s390/crypto/zcrypt_pcixcc.h" ++#include "drivers/s390/crypto/zcrypt_cex2a.h" ++ ++/* sclp device driver */ ++#include "drivers/s390/char/sclp.h" ++#include "drivers/s390/char/sclp_rw.h" ++#include "drivers/s390/char/sclp_tty.h" ++ ++/* vmur device driver */ ++#include "drivers/s390/char/vmur.h" ++ ++/* qdio device driver */ ++#include "drivers/s390/cio/qdio.h" ++#include "drivers/s390/cio/qdio_thinint.c" ++#include "drivers/s390/cio/qdio_perf.h" ++ ++/* ++ * include sched.c for types: ++ * - struct prio_array ++ * - struct runqueue ++ */ ++#include "kernel/sched.c" ++/* ++ * include slab.c for struct kmem_cache ++ */ ++#ifdef CONFIG_SLUB ++ #include "mm/slub.c" ++#else ++ #include "mm/slab.c" ++#endif ++ ++/* include driver core private structures */ ++#include "drivers/base/base.h" diff --git a/src/patches/suse-2.6.27.25/patches.suse/s390-System.map.diff b/src/patches/suse-2.6.27.25/patches.suse/s390-System.map.diff new file mode 100644 index 0000000000..93b954610c --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/s390-System.map.diff @@ -0,0 +1,30 @@ +From: Bernhard Walle +Subject: [PATCH] Strip L2^B symbols +Patch-mainline: never +References: bnc #456682 + +This patches strips all L2^B symbols that happen on s390 only from System.map. +We don't need that symbols as this are local labels. It confuses (older) +versions of crash and just makes System.map larger. + +The proper fix needs to be in binutils. However, since the binutils maintainer +at SUSE is not cooperative I workarounded this in the kernel. The proper +binutils patch is already mainline [1]. + + +Signed-off-by: Bernhard Walle + +[1] http://article.gmane.org/gmane.comp.gnu.binutils.cvs/12731 +--- + scripts/mksysmap | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/scripts/mksysmap ++++ b/scripts/mksysmap +@@ -41,5 +41,5 @@ + # so we just ignore them to let readprofile continue to work. + # (At least sparc64 has __crc_ in the middle). + +-$NM -n $1 | grep -v '\( [aNUw] \)\|\(__crc_\)\|\( \$[adt]\)' > $2 ++$NM -n $1 | grep -v '\( [aNUw] \)\|\(__crc_\)\|\( \$[adt]\)\|\(L2\)' > $2 + diff --git a/src/patches/suse-2.6.27.25/patches.suse/usb_correct_config_ti_04b3_4543.diff b/src/patches/suse-2.6.27.25/patches.suse/usb_correct_config_ti_04b3_4543.diff new file mode 100644 index 0000000000..c6c603876b --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.suse/usb_correct_config_ti_04b3_4543.diff @@ -0,0 +1,26 @@ +From: Petr Ostadal +Subject: fix ti_usb_3410_5052 driver for device 04b3:4543 +References: bnc#395775 + +Signed-off-by: Oliver Neukum + +--- + drivers/usb/serial/ti_usb_3410_5052.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/usb/serial/ti_usb_3410_5052.c ++++ b/drivers/usb/serial/ti_usb_3410_5052.c +@@ -464,9 +464,11 @@ static int ti_startup(struct usb_serial + goto free_tdev; + } + +- /* the second configuration must be set (in sysfs by hotplug script) */ ++ /* the second configuration must be set */ ++ printk(KERN_DEBUG"%s: bConfigurationValue: %x\n", __FUNCTION__, dev->actconfig->desc.bConfigurationValue); + if (dev->actconfig->desc.bConfigurationValue == TI_BOOT_CONFIG) { +- status = -ENODEV; ++ status = usb_driver_set_configuration(dev, TI_ACTIVE_CONFIG); ++ status = status ? status : -ENODEV; + goto free_tdev; + } + diff --git a/src/patches/suse-2.6.27.25/patches.trace/ftrace-framepointer.diff b/src/patches/suse-2.6.27.25/patches.trace/ftrace-framepointer.diff new file mode 100644 index 0000000000..d2d2fe4075 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.trace/ftrace-framepointer.diff @@ -0,0 +1,32 @@ +From: Jan Blunck +Subject: Select FRAME_POINTER only on SYSPROF_TRACER +Date: Wed Sep 24 10:32:16 CEST 2008 + +The only tracer that requires frame pointers is the sysprof trace. Since this +tracer copies the functionality of oprofile, it isn't required at all. + +Signed-off-by: Jan Blunck +--- + kernel/trace/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +Index: b/kernel/trace/Kconfig +=================================================================== +--- a/kernel/trace/Kconfig ++++ b/kernel/trace/Kconfig +@@ -18,7 +18,6 @@ config TRACING + config FTRACE + bool "Kernel Function Tracer" + depends on HAVE_FTRACE +- select FRAME_POINTER + select TRACING + select CONTEXT_SWITCH_TRACER + help +@@ -79,6 +78,7 @@ config SYSPROF_TRACER + bool "Sysprof Tracer" + depends on X86 + select TRACING ++ select FRAME_POINTER + help + This tracer provides the trace needed by the 'Sysprof' userspace + tool. diff --git a/src/patches/suse-2.6.27.25/patches.trace/s390-syscall-get-nr.diff b/src/patches/suse-2.6.27.25/patches.trace/s390-syscall-get-nr.diff new file mode 100644 index 0000000000..2d33f08575 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.trace/s390-syscall-get-nr.diff @@ -0,0 +1,281 @@ +Subject: [PATCH] fix syscall_get_nr. + +From: Martin Schwidefsky + +syscall_get_nr() currently returns a valid result only if the call +chain of the traced process includes do_syscall_trace_enter(). But +collect_syscall() can be called for any sleeping task, the result of +syscall_get_nr() in general is completely bogus. + +To make syscall_get_nr() work for any sleeping task the traps field +in pt_regs is replace with svcnr - the system call number the process +is executing. If svcnr == 0 the process is not on a system call path. + +Signed-off-by: Martin Schwidefsky +Signed-off-by: Martin Schwidefsky +Acked-by: John Jolly +--- + + arch/s390/include/asm/ptrace.h | 2 +- + arch/s390/include/asm/syscall.h | 4 +--- + arch/s390/kernel/asm-offsets.c | 2 +- + arch/s390/kernel/compat_signal.c | 2 +- + arch/s390/kernel/entry.S | 21 +++++++++++---------- + arch/s390/kernel/entry64.S | 23 ++++++++++------------- + arch/s390/kernel/ptrace.c | 2 +- + arch/s390/kernel/signal.c | 6 +++--- + 8 files changed, 29 insertions(+), 33 deletions(-) + +--- a/arch/s390/include/asm/ptrace.h ++++ b/arch/s390/include/asm/ptrace.h +@@ -321,8 +321,8 @@ struct pt_regs + psw_t psw; + unsigned long gprs[NUM_GPRS]; + unsigned long orig_gpr2; ++ unsigned short svcnr; + unsigned short ilc; +- unsigned short trap; + }; + #endif + +--- a/arch/s390/include/asm/syscall.h ++++ b/arch/s390/include/asm/syscall.h +@@ -17,9 +17,7 @@ + static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) + { +- if (regs->trap != __LC_SVC_OLD_PSW) +- return -1; +- return regs->gprs[2]; ++ return regs->svcnr ? regs->svcnr : -1; + } + + static inline void syscall_rollback(struct task_struct *task, +--- a/arch/s390/kernel/asm-offsets.c ++++ b/arch/s390/kernel/asm-offsets.c +@@ -32,7 +32,7 @@ int main(void) + DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); + DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); + DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc)); +- DEFINE(__PT_TRAP, offsetof(struct pt_regs, trap)); ++ DEFINE(__PT_SVCNR, offsetof(struct pt_regs, svcnr)); + DEFINE(__PT_SIZE, sizeof(struct pt_regs)); + BLANK(); + DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); +--- a/arch/s390/kernel/compat_signal.c ++++ b/arch/s390/kernel/compat_signal.c +@@ -340,7 +340,7 @@ static int restore_sigregs32(struct pt_r + return err; + + restore_fp_regs(¤t->thread.fp_regs); +- regs->trap = -1; /* disable syscall checks */ ++ regs->svcnr = 0; /* disable syscall checks */ + return 0; + } + +--- a/arch/s390/kernel/entry64.S ++++ b/arch/s390/kernel/entry64.S +@@ -46,7 +46,7 @@ SP_R14 = STACK_FRAME_OVERHEAD + __P + SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 120 + SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 + SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC +-SP_TRAP = STACK_FRAME_OVERHEAD + __PT_TRAP ++SP_SVCNR = STACK_FRAME_OVERHEAD + __PT_SVCNR + SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE + + STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER +@@ -168,11 +168,10 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_ + .macro CREATE_STACK_FRAME psworg,savearea + aghi %r15,-SP_SIZE # make room for registers & psw + mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack +- la %r12,\psworg + stg %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 +- icm %r12,12,__LC_SVC_ILC ++ icm %r12,3,__LC_SVC_ILC + stmg %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack +- st %r12,SP_ILC(%r15) ++ st %r12,SP_SVCNR(%r15) + mvc SP_R12(32,%r15),\savearea # move %r12-%r15 to stack + la %r12,0 + stg %r12,__SF_BACKCHAIN(%r15) +@@ -247,16 +246,17 @@ sysc_update: + #endif + sysc_do_svc: + lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct +- slag %r7,%r7,2 # *4 and test for svc 0 ++ ltgr %r7,%r7 # test for svc 0 + jnz sysc_nr_ok + # svc 0: system call number in %r1 + cl %r1,BASED(.Lnr_syscalls) + jnl sysc_nr_ok + lgfr %r7,%r1 # clear high word in r1 +- slag %r7,%r7,2 # svc 0: system call number in %r1 + sysc_nr_ok: + mvc SP_ARGS(8,%r15),SP_R7(%r15) + sysc_do_restart: ++ sth %r7,SP_SVCNR(%r15) ++ sllg %r7,%r7,2 # svc number * 4 + larl %r10,sys_call_table + #ifdef CONFIG_COMPAT + tm __TI_flags+5(%r9),(_TIF_31BIT>>16) # running in 31 bit mode ? +@@ -360,7 +360,6 @@ sysc_notify_resume: + sysc_restart: + ni __TI_flags+7(%r9),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC + lg %r7,SP_R2(%r15) # load new svc number +- slag %r7,%r7,2 # *4 + mvc SP_R2(8,%r15),SP_ORIG_R2(%r15) # restore first argument + lmg %r2,%r6,SP_R2(%r15) # load svc arguments + j sysc_do_restart # restart svc +@@ -369,9 +368,8 @@ sysc_restart: + # _TIF_SINGLE_STEP is set, call do_single_step + # + sysc_singlestep: +- ni __TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP +- lhi %r0,__LC_PGM_OLD_PSW +- sth %r0,SP_TRAP(%r15) # set trap indication to pgm check ++ ni __TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP ++ xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number + la %r2,SP_PTREGS(%r15) # address of register-save area + larl %r14,sysc_return # load adr. of system return + jg do_single_step # branch to do_sigtrap +@@ -389,7 +387,7 @@ sysc_tracesys: + lghi %r0,NR_syscalls + clgr %r0,%r2 + jnh sysc_tracenogo +- slag %r7,%r2,2 # *4 ++ sllg %r7,%r2,2 # svc number *4 + lgf %r8,0(%r7,%r10) + sysc_tracego: + lmg %r3,%r6,SP_R3(%r15) +@@ -564,8 +562,7 @@ pgm_svcper: + # per was called from kernel, must be kprobes + # + kernel_per: +- lhi %r0,__LC_PGM_OLD_PSW +- sth %r0,SP_TRAP(%r15) # set trap indication to pgm check ++ xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number + la %r2,SP_PTREGS(%r15) # address of register-save area + larl %r14,sysc_restore # load adr. of system ret, no work + jg do_single_step # branch to do_single_step +--- a/arch/s390/kernel/entry.S ++++ b/arch/s390/kernel/entry.S +@@ -46,7 +46,7 @@ SP_R14 = STACK_FRAME_OVERHEAD + __P + SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 60 + SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 + SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC +-SP_TRAP = STACK_FRAME_OVERHEAD + __PT_TRAP ++SP_SVCNR = STACK_FRAME_OVERHEAD + __PT_SVCNR + SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE + + _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ +@@ -180,11 +180,10 @@ STACK_SIZE = 1 << STACK_SHIFT + .macro CREATE_STACK_FRAME psworg,savearea + s %r15,BASED(.Lc_spsize) # make room for registers & psw + mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack +- la %r12,\psworg + st %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 +- icm %r12,12,__LC_SVC_ILC ++ icm %r12,3,__LC_SVC_ILC + stm %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack +- st %r12,SP_ILC(%r15) ++ st %r12,SP_SVCNR(%r15) + mvc SP_R12(16,%r15),\savearea # move %r12-%r15 to stack + la %r12,0 + st %r12,__SF_BACKCHAIN(%r15) # clear back chain +@@ -261,16 +260,17 @@ sysc_update: + #endif + sysc_do_svc: + l %r9,__LC_THREAD_INFO # load pointer to thread_info struct +- sla %r7,2 # *4 and test for svc 0 ++ ltr %r7,%r7 # test for svc 0 + bnz BASED(sysc_nr_ok) # svc number > 0 + # svc 0: system call number in %r1 + cl %r1,BASED(.Lnr_syscalls) + bnl BASED(sysc_nr_ok) + lr %r7,%r1 # copy svc number to %r7 +- sla %r7,2 # *4 + sysc_nr_ok: + mvc SP_ARGS(4,%r15),SP_R7(%r15) + sysc_do_restart: ++ sth %r7,SP_SVCNR(%r15) ++ sll %r7,2 # svc number *4 + l %r8,BASED(.Lsysc_table) + tm __TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) + l %r8,0(%r7,%r8) # get system call addr. +@@ -373,7 +373,6 @@ sysc_notify_resume: + sysc_restart: + ni __TI_flags+3(%r9),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC + l %r7,SP_R2(%r15) # load new svc number +- sla %r7,2 + mvc SP_R2(4,%r15),SP_ORIG_R2(%r15) # restore first argument + lm %r2,%r6,SP_R2(%r15) # load svc arguments + b BASED(sysc_do_restart) # restart svc +@@ -383,7 +382,8 @@ sysc_restart: + # + sysc_singlestep: + ni __TI_flags+3(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP +- mvi SP_TRAP+1(%r15),0x28 # set trap indication to pgm check ++ mvi SP_SVCNR(%r15),0xff # set trap indication to pgm check ++ mvi SP_SVCNR+1(%r15),0xff + la %r2,SP_PTREGS(%r15) # address of register-save area + l %r1,BASED(.Lhandle_per) # load adr. of per handler + la %r14,BASED(sysc_return) # load adr. of system return +@@ -404,7 +404,7 @@ sysc_tracesys: + bnl BASED(sysc_tracenogo) + l %r8,BASED(.Lsysc_table) + lr %r7,%r2 +- sll %r7,2 # *4 ++ sll %r7,2 # svc number *4 + l %r8,0(%r7,%r8) + sysc_tracego: + lm %r3,%r6,SP_R3(%r15) +@@ -583,7 +583,8 @@ pgm_svcper: + # per was called from kernel, must be kprobes + # + kernel_per: +- mvi SP_TRAP+1(%r15),0x28 # set trap indication to pgm check ++ mvi SP_SVCNR(%r15),0xff # set trap indication to pgm check ++ mvi SP_SVCNR+1(%r15),0xff + la %r2,SP_PTREGS(%r15) # address of register-save area + l %r1,BASED(.Lhandle_per) # load adr. of per handler + la %r14,BASED(sysc_restore)# load adr. of system return +--- a/arch/s390/kernel/ptrace.c ++++ b/arch/s390/kernel/ptrace.c +@@ -671,7 +671,7 @@ asmlinkage long do_syscall_trace_enter(s + * debugger stored an invalid system call number. Skip + * the system call and the system call restart handling. + */ +- regs->trap = -1; ++ regs->svcnr = 0; + ret = -1; + } + +--- a/arch/s390/kernel/signal.c ++++ b/arch/s390/kernel/signal.c +@@ -157,7 +157,7 @@ static int restore_sigregs(struct pt_reg + current->thread.fp_regs.fpc &= FPC_VALID_MASK; + + restore_fp_regs(¤t->thread.fp_regs); +- regs->trap = -1; /* disable syscall checks */ ++ regs->svcnr = 0; /* disable syscall checks */ + return 0; + } + +@@ -442,7 +442,7 @@ void do_signal(struct pt_regs *regs) + oldset = ¤t->blocked; + + /* Are we from a system call? */ +- if (regs->trap == __LC_SVC_OLD_PSW) { ++ if (regs->svcnr) { + continue_addr = regs->psw.addr; + restart_addr = continue_addr - regs->ilc; + retval = regs->gprs[2]; +@@ -459,7 +459,7 @@ void do_signal(struct pt_regs *regs) + case -ERESTART_RESTARTBLOCK: + regs->gprs[2] = -EINTR; + } +- regs->trap = -1; /* Don't deal with this again. */ ++ regs->svcnr = 0; /* Don't deal with this again. */ + } + + /* Get signal to deliver. When running under ptrace, at this point diff --git a/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-arch-i386.diff b/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-arch-i386.diff new file mode 100644 index 0000000000..4ac9419486 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-arch-i386.diff @@ -0,0 +1,280 @@ +Subject: xen3 arch-i386 +From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 728:832aac894efd) +Patch-mainline: obsolete +Acked-by: jbeulich@novell.com + +Index: head-2008-11-25/arch/x86/kernel/asm-offsets_32.c +=================================================================== +--- head-2008-11-25.orig/arch/x86/kernel/asm-offsets_32.c 2008-11-25 12:33:06.000000000 +0100 ++++ head-2008-11-25/arch/x86/kernel/asm-offsets_32.c 2008-11-25 12:35:53.000000000 +0100 +@@ -91,9 +91,14 @@ void foo(void) + OFFSET(pbe_orig_address, pbe, orig_address); + OFFSET(pbe_next, pbe, next); + ++#ifndef CONFIG_X86_NO_TSS + /* Offset from the sysenter stack to tss.sp0 */ +- DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - ++ DEFINE(SYSENTER_stack_sp0, offsetof(struct tss_struct, x86_tss.sp0) - + sizeof(struct tss_struct)); ++#else ++ /* sysenter stack points directly to sp0 */ ++ DEFINE(SYSENTER_stack_sp0, 0); ++#endif + + DEFINE(PAGE_SIZE_asm, PAGE_SIZE); + DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT); +Index: head-2008-11-25/arch/x86/kernel/entry_32.S +=================================================================== +--- head-2008-11-25.orig/arch/x86/kernel/entry_32.S 2008-11-25 12:33:06.000000000 +0100 ++++ head-2008-11-25/arch/x86/kernel/entry_32.S 2008-11-25 12:35:53.000000000 +0100 +@@ -293,7 +293,7 @@ ENTRY(ia32_sysenter_target) + CFI_SIGNAL_FRAME + CFI_DEF_CFA esp, 0 + CFI_REGISTER esp, ebp +- movl TSS_sysenter_sp0(%esp),%esp ++ movl SYSENTER_stack_sp0(%esp),%esp + sysenter_past_esp: + /* + * Interrupts are disabled here, but we can't trace it until +@@ -782,7 +782,7 @@ END(device_not_available) + * that sets up the real kernel stack. Check here, since we can't + * allow the wrong stack to be used. + * +- * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have ++ * "SYSENTER_stack_sp0+12" is because the NMI/debug handler will have + * already pushed 3 words if it hits on the sysenter instruction: + * eflags, cs and eip. + * +@@ -794,7 +794,7 @@ END(device_not_available) + cmpw $__KERNEL_CS,4(%esp); \ + jne ok; \ + label: \ +- movl TSS_sysenter_sp0+offset(%esp),%esp; \ ++ movl SYSENTER_stack_sp0+offset(%esp),%esp; \ + CFI_DEF_CFA esp, 0; \ + CFI_UNDEFINED eip; \ + pushfl; \ +Index: head-2008-11-25/arch/x86/kernel/machine_kexec_32.c +=================================================================== +--- head-2008-11-25.orig/arch/x86/kernel/machine_kexec_32.c 2008-11-17 13:38:03.000000000 +0100 ++++ head-2008-11-25/arch/x86/kernel/machine_kexec_32.c 2008-11-25 12:35:53.000000000 +0100 +@@ -25,6 +25,10 @@ + #include + #include + ++#ifdef CONFIG_XEN ++#include ++#endif ++ + #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) + static u32 kexec_pgd[1024] PAGE_ALIGNED; + #ifdef CONFIG_X86_PAE +@@ -34,6 +38,55 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED + static u32 kexec_pte0[1024] PAGE_ALIGNED; + static u32 kexec_pte1[1024] PAGE_ALIGNED; + ++#ifdef CONFIG_XEN ++ ++#define __ma(x) (pfn_to_mfn(__pa((x)) >> PAGE_SHIFT) << PAGE_SHIFT) ++ ++#if PAGES_NR > KEXEC_XEN_NO_PAGES ++#error PAGES_NR is greater than KEXEC_XEN_NO_PAGES - Xen support will break ++#endif ++ ++#if PA_CONTROL_PAGE != 0 ++#error PA_CONTROL_PAGE is non zero - Xen support will break ++#endif ++ ++void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, struct kimage *image) ++{ ++ void *control_page; ++ ++ memset(xki->page_list, 0, sizeof(xki->page_list)); ++ ++ control_page = page_address(image->control_code_page); ++ memcpy(control_page, relocate_kernel, PAGE_SIZE); ++ ++ xki->page_list[PA_CONTROL_PAGE] = __ma(control_page); ++ xki->page_list[PA_PGD] = __ma(kexec_pgd); ++#ifdef CONFIG_X86_PAE ++ xki->page_list[PA_PMD_0] = __ma(kexec_pmd0); ++ xki->page_list[PA_PMD_1] = __ma(kexec_pmd1); ++#endif ++ xki->page_list[PA_PTE_0] = __ma(kexec_pte0); ++ xki->page_list[PA_PTE_1] = __ma(kexec_pte1); ++ ++} ++ ++int __init machine_kexec_setup_resources(struct resource *hypervisor, ++ struct resource *phys_cpus, ++ int nr_phys_cpus) ++{ ++ int k; ++ ++ /* The per-cpu crash note resources belong to the hypervisor resource */ ++ for (k = 0; k < nr_phys_cpus; k++) ++ request_resource(hypervisor, phys_cpus + k); ++ ++ return 0; ++} ++ ++void machine_kexec_register_resources(struct resource *res) { ; } ++ ++#endif /* CONFIG_XEN */ ++ + /* + * A architecture hook called to validate the + * proposed image and prepare the control pages +@@ -64,6 +117,7 @@ void machine_kexec_cleanup(struct kimage + set_pages_nx(image->control_code_page, 1); + } + ++#ifndef CONFIG_XEN + /* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. +@@ -137,6 +191,7 @@ void machine_kexec(struct kimage *image) + + __ftrace_enabled_restore(save_ftrace_enabled); + } ++#endif + + void arch_crash_save_vmcoreinfo(void) + { +Index: head-2008-11-25/arch/x86/kernel/vm86_32.c +=================================================================== +--- head-2008-11-25.orig/arch/x86/kernel/vm86_32.c 2008-11-25 12:33:06.000000000 +0100 ++++ head-2008-11-25/arch/x86/kernel/vm86_32.c 2008-11-25 12:35:53.000000000 +0100 +@@ -124,7 +124,9 @@ static int copy_vm86_regs_from_user(stru + + struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) + { ++#ifndef CONFIG_X86_NO_TSS + struct tss_struct *tss; ++#endif + struct pt_regs *ret; + unsigned long tmp; + +@@ -147,12 +149,16 @@ struct pt_regs *save_v86_state(struct ke + do_exit(SIGSEGV); + } + ++#ifndef CONFIG_X86_NO_TSS + tss = &per_cpu(init_tss, get_cpu()); ++#endif + current->thread.sp0 = current->thread.saved_sp0; + current->thread.sysenter_cs = __KERNEL_CS; + load_sp0(tss, ¤t->thread); + current->thread.saved_sp0 = 0; ++#ifndef CONFIG_X86_NO_TSS + put_cpu(); ++#endif + + ret = KVM86->regs32; + +@@ -279,7 +285,9 @@ out: + + static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk) + { ++#ifndef CONFIG_X86_NO_TSS + struct tss_struct *tss; ++#endif + /* + * make sure the vm86() system call doesn't try to do anything silly + */ +@@ -324,12 +332,16 @@ static void do_sys_vm86(struct kernel_vm + tsk->thread.saved_fs = info->regs32->fs; + savesegment(gs, tsk->thread.saved_gs); + ++#ifndef CONFIG_X86_NO_TSS + tss = &per_cpu(init_tss, get_cpu()); ++#endif + tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; + if (cpu_has_sep) + tsk->thread.sysenter_cs = 0; + load_sp0(tss, &tsk->thread); ++#ifndef CONFIG_X86_NO_TSS + put_cpu(); ++#endif + + tsk->thread.screen_bitmap = info->screen_bitmap; + if (info->flags & VM86_SCREEN_BITMAP) +Index: head-2008-11-25/arch/x86/power/cpu_32.c +=================================================================== +--- head-2008-11-25.orig/arch/x86/power/cpu_32.c 2008-11-25 12:33:06.000000000 +0100 ++++ head-2008-11-25/arch/x86/power/cpu_32.c 2008-11-25 12:35:53.000000000 +0100 +@@ -65,6 +65,7 @@ static void do_fpu_end(void) + + static void fix_processor_context(void) + { ++#ifndef CONFIG_X86_NO_TSS + int cpu = smp_processor_id(); + struct tss_struct *t = &per_cpu(init_tss, cpu); + +@@ -74,6 +75,7 @@ static void fix_processor_context(void) + * 386 hardware has concept of busy TSS or some + * similar stupidity. + */ ++#endif + + load_TR_desc(); /* This does ltr */ + load_LDT(¤t->active_mm->context); /* This does lldt */ +Index: head-2008-11-25/arch/x86/vdso/vdso32-setup.c +=================================================================== +--- head-2008-11-25.orig/arch/x86/vdso/vdso32-setup.c 2008-11-25 12:33:06.000000000 +0100 ++++ head-2008-11-25/arch/x86/vdso/vdso32-setup.c 2008-11-25 12:35:53.000000000 +0100 +@@ -26,6 +26,10 @@ + #include + #include + ++#ifdef CONFIG_XEN ++#include ++#endif ++ + enum { + VDSO_DISABLED = 0, + VDSO_ENABLED = 1, +@@ -225,6 +229,7 @@ static inline void map_compat_vdso(int m + + void enable_sep_cpu(void) + { ++#ifndef CONFIG_XEN + int cpu = get_cpu(); + struct tss_struct *tss = &per_cpu(init_tss, cpu); + +@@ -239,6 +244,35 @@ void enable_sep_cpu(void) + wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0); + wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0); + put_cpu(); ++#else ++ extern asmlinkage void ia32pv_sysenter_target(void); ++ static struct callback_register sysenter = { ++ .type = CALLBACKTYPE_sysenter, ++ .address = { __KERNEL_CS, (unsigned long)ia32pv_sysenter_target }, ++ }; ++ ++ if (!boot_cpu_has(X86_FEATURE_SEP)) ++ return; ++ ++ get_cpu(); ++ ++ if (xen_feature(XENFEAT_supervisor_mode_kernel)) ++ sysenter.address.eip = (unsigned long)ia32_sysenter_target; ++ ++ switch (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter)) { ++ case 0: ++ break; ++#if CONFIG_XEN_COMPAT < 0x030200 ++ case -ENOSYS: ++ sysenter.type = CALLBACKTYPE_sysenter_deprecated; ++ if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) == 0) ++ break; ++#endif ++ default: ++ clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability); ++ break; ++ } ++#endif + } + + static struct vm_area_struct gate_vma; diff --git a/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-arch-x86.diff b/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-arch-x86.diff new file mode 100644 index 0000000000..7942b0c675 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-arch-x86.diff @@ -0,0 +1,328 @@ +Subject: xen3 arch-x86 +From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 728:832aac894efd) +Patch-mainline: obsolete +Acked-by: jbeulich@novell.com + +List of files that don't require modification anymore (and hence +removed from this patch), for reference and in case upstream wants to +take the forward porting patches: +2.6.26/arch/x86/kernel/crash.c + +Index: head-2008-12-01/arch/x86/Makefile +=================================================================== +--- head-2008-12-01.orig/arch/x86/Makefile 2008-12-01 10:53:14.000000000 +0100 ++++ head-2008-12-01/arch/x86/Makefile 2008-12-01 11:11:08.000000000 +0100 +@@ -115,6 +115,10 @@ mcore-y := arch/x86/mach-default/ + mflags-$(CONFIG_X86_VOYAGER) := -Iinclude/asm-x86/mach-voyager + mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/ + ++# Xen subarch support ++mflags-$(CONFIG_X86_XEN) := -Iinclude/asm-x86/mach-xen ++mcore-$(CONFIG_X86_XEN) := arch/x86/mach-xen/ ++ + # generic subarchitecture + mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic + fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ +@@ -183,9 +187,26 @@ drivers-$(CONFIG_KDB) += arch/x86/kdb/ + + boot := arch/x86/boot + +-PHONY += zImage bzImage compressed zlilo bzlilo \ ++PHONY += zImage bzImage vmlinuz compressed zlilo bzlilo \ + zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install + ++ifdef CONFIG_XEN ++CPPFLAGS := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \ ++ -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS) ++ ++ifdef CONFIG_X86_64 ++LDFLAGS_vmlinux := -e startup_64 ++endif ++ ++# Default kernel to build ++all: vmlinuz ++ ++# KBUILD_IMAGE specifies the target image being built ++KBUILD_IMAGE := $(boot)/vmlinuz ++ ++vmlinuz: vmlinux ++ $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) ++else + # Default kernel to build + all: bzImage + +@@ -208,6 +229,7 @@ zdisk bzdisk: vmlinux + + fdimage fdimage144 fdimage288 isoimage: vmlinux + $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@ ++endif + + install: + $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install +Index: head-2008-12-01/arch/x86/boot/Makefile +=================================================================== +--- head-2008-12-01.orig/arch/x86/boot/Makefile 2008-12-01 10:53:14.000000000 +0100 ++++ head-2008-12-01/arch/x86/boot/Makefile 2008-12-01 11:11:08.000000000 +0100 +@@ -25,7 +25,7 @@ SVGA_MODE := -DSVGA_MODE=NORMAL_VGA + + #RAMDISK := -DRAMDISK=512 + +-targets := vmlinux.bin setup.bin setup.elf zImage bzImage ++targets := vmlinux.bin setup.bin setup.elf zImage bzImage vmlinuz vmlinux-stripped + subdir- := compressed + + setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o +@@ -190,5 +190,13 @@ zlilo: $(BOOTIMAGE) + cp System.map $(INSTALL_PATH)/ + if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi + ++$(obj)/vmlinuz: $(obj)/vmlinux-stripped FORCE ++ $(call if_changed,gzip) ++ @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' ++ ++$(obj)/vmlinux-stripped: OBJCOPYFLAGS := -g --strip-unneeded ++$(obj)/vmlinux-stripped: vmlinux FORCE ++ $(call if_changed,objcopy) ++ + install: + sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)" +Index: head-2008-12-01/arch/x86/kernel/Makefile +=================================================================== +--- head-2008-12-01.orig/arch/x86/kernel/Makefile 2008-12-01 10:53:14.000000000 +0100 ++++ head-2008-12-01/arch/x86/kernel/Makefile 2008-12-01 11:14:33.000000000 +0100 +@@ -99,10 +99,13 @@ scx200-y += scx200_32.o + + obj-$(CONFIG_OLPC) += olpc.o + ++obj-$(CONFIG_X86_XEN) += fixup.o ++ + ### + # 64 bit specific files + ifeq ($(CONFIG_X86_64),y) + obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o ++ obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_xen_64.o + obj-y += uv_sysfs.o + obj-y += genx2apic_cluster.o + obj-y += genx2apic_phys.o +@@ -116,4 +119,10 @@ ifeq ($(CONFIG_X86_64),y) + obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o + + obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o ++ ++ time_64-$(CONFIG_XEN) += time_32.o ++ pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o + endif ++ ++disabled-obj-$(CONFIG_XEN) := i8259_$(BITS).o reboot.o smpboot_$(BITS).o ++%/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := +Index: head-2008-12-01/arch/x86/kernel/acpi/Makefile +=================================================================== +--- head-2008-12-01.orig/arch/x86/kernel/acpi/Makefile 2008-12-01 10:53:14.000000000 +0100 ++++ head-2008-12-01/arch/x86/kernel/acpi/Makefile 2008-12-01 11:11:08.000000000 +0100 +@@ -5,6 +5,9 @@ obj-$(CONFIG_ACPI_SLEEP) += sleep.o wake + + ifneq ($(CONFIG_ACPI_PROCESSOR),) + obj-y += cstate.o processor.o ++ifneq ($(CONFIG_PROCESSOR_EXTERNAL_CONTROL),) ++obj-$(CONFIG_XEN) += processor_extcntl_xen.o ++endif + endif + + $(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin +@@ -12,3 +15,4 @@ $(obj)/wakeup_rm.o: $(obj)/realmode/w + $(obj)/realmode/wakeup.bin: FORCE + $(Q)$(MAKE) $(build)=$(obj)/realmode + ++disabled-obj-$(CONFIG_XEN) := cstate.o wakeup_$(BITS).o +Index: head-2008-12-01/arch/x86/kernel/acpi/boot.c +=================================================================== +--- head-2008-12-01.orig/arch/x86/kernel/acpi/boot.c 2008-12-01 10:53:14.000000000 +0100 ++++ head-2008-12-01/arch/x86/kernel/acpi/boot.c 2008-12-01 11:11:08.000000000 +0100 +@@ -130,8 +130,10 @@ char *__init __acpi_map_table(unsigned l + if (!phys || !size) + return NULL; + ++#ifndef CONFIG_XEN + if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT)) + return __va(phys); ++#endif + + offset = phys & (PAGE_SIZE - 1); + mapped_size = PAGE_SIZE - offset; +Index: head-2008-12-01/arch/x86/kernel/acpi/processor.c +=================================================================== +--- head-2008-12-01.orig/arch/x86/kernel/acpi/processor.c 2008-12-01 10:53:14.000000000 +0100 ++++ head-2008-12-01/arch/x86/kernel/acpi/processor.c 2008-12-01 11:11:08.000000000 +0100 +@@ -75,7 +75,18 @@ static void init_intel_pdc(struct acpi_p + /* Initialize _PDC data based on the CPU vendor */ + void arch_acpi_processor_init_pdc(struct acpi_processor *pr) + { ++#ifdef CONFIG_XEN ++ /* ++ * As a work-around, just use cpu0's cpuinfo for all processors. ++ * Further work is required to expose xen hypervisor interface of ++ * getting physical cpuinfo to dom0 kernel and then ++ * arch_acpi_processor_init_pdc can set _PDC parameters according ++ * to Xen's phys information. ++ */ ++ struct cpuinfo_x86 *c = &boot_cpu_data; ++#else + struct cpuinfo_x86 *c = &cpu_data(pr->id); ++#endif + + pr->pdc = NULL; + if (c->x86_vendor == X86_VENDOR_INTEL) +Index: head-2008-12-01/arch/x86/kernel/cpu/mtrr/Makefile +=================================================================== +--- head-2008-12-01.orig/arch/x86/kernel/cpu/mtrr/Makefile 2008-12-01 10:53:14.000000000 +0100 ++++ head-2008-12-01/arch/x86/kernel/cpu/mtrr/Makefile 2008-12-01 11:11:08.000000000 +0100 +@@ -1,3 +1,4 @@ + obj-y := main.o if.o generic.o state.o + obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o + ++obj-$(CONFIG_XEN) := main.o if.o +Index: head-2008-12-01/arch/x86/lib/Makefile +=================================================================== +--- head-2008-12-01.orig/arch/x86/lib/Makefile 2008-12-01 10:53:14.000000000 +0100 ++++ head-2008-12-01/arch/x86/lib/Makefile 2008-12-01 11:11:08.000000000 +0100 +@@ -25,3 +25,5 @@ else + lib-y += memmove_64.o memset_64.o + lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o + endif ++ ++lib-$(CONFIG_XEN_SCRUB_PAGES) += scrub.o +Index: head-2008-12-01/arch/x86/mm/Makefile +=================================================================== +--- head-2008-12-01.orig/arch/x86/mm/Makefile 2008-12-01 10:53:14.000000000 +0100 ++++ head-2008-12-01/arch/x86/mm/Makefile 2008-12-01 11:11:08.000000000 +0100 +@@ -21,4 +21,6 @@ obj-$(CONFIG_K8_NUMA) += k8topology_64. + endif + obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o + ++obj-$(CONFIG_XEN) += hypervisor.o ++ + obj-$(CONFIG_MEMTEST) += memtest.o +Index: head-2008-12-01/arch/x86/oprofile/Makefile +=================================================================== +--- head-2008-12-01.orig/arch/x86/oprofile/Makefile 2008-12-01 10:53:14.000000000 +0100 ++++ head-2008-12-01/arch/x86/oprofile/Makefile 2008-12-01 11:11:08.000000000 +0100 +@@ -6,7 +6,14 @@ DRIVER_OBJS = $(addprefix ../../../drive + oprofilefs.o oprofile_stats.o \ + timer_int.o ) + ++ifdef CONFIG_XEN ++XENOPROF_COMMON_OBJS = $(addprefix ../../../drivers/xen/xenoprof/, \ ++ xenoprofile.o) ++oprofile-y := $(DRIVER_OBJS) \ ++ $(XENOPROF_COMMON_OBJS) xenoprof.o ++else + oprofile-y := $(DRIVER_OBJS) init.o backtrace.o + oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \ + op_model_ppro.o op_model_p4.o + oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o ++endif +Index: head-2008-12-01/arch/x86/pci/Makefile +=================================================================== +--- head-2008-12-01.orig/arch/x86/pci/Makefile 2008-12-01 10:53:14.000000000 +0100 ++++ head-2008-12-01/arch/x86/pci/Makefile 2008-12-01 11:11:08.000000000 +0100 +@@ -4,6 +4,9 @@ obj-$(CONFIG_PCI_BIOS) += pcbios.o + obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o + obj-$(CONFIG_PCI_DIRECT) += direct.o + obj-$(CONFIG_PCI_OLPC) += olpc.o ++# pcifront should be after mmconfig.o and direct.o as it should only ++# take over if direct access to the PCI bus is unavailable ++obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += pcifront.o + + obj-y += fixup.o + obj-$(CONFIG_ACPI) += acpi.o +Index: head-2008-12-01/include/asm-x86/acpi.h +=================================================================== +--- head-2008-12-01.orig/include/asm-x86/acpi.h 2008-12-01 10:53:14.000000000 +0100 ++++ head-2008-12-01/include/asm-x86/acpi.h 2008-12-01 11:11:08.000000000 +0100 +@@ -30,6 +30,10 @@ + #include + #include + ++#ifdef CONFIG_XEN ++#include ++#endif ++ + #define COMPILER_DEPENDENT_INT64 long long + #define COMPILER_DEPENDENT_UINT64 unsigned long long + +@@ -124,6 +128,27 @@ extern unsigned long acpi_wakeup_address + /* early initialization routine */ + extern void acpi_reserve_bootmem(void); + ++#ifdef CONFIG_XEN ++static inline int acpi_notify_hypervisor_state(u8 sleep_state, ++ u32 pm1a_cnt_val, ++ u32 pm1b_cnt_val) ++{ ++ struct xen_platform_op op = { ++ .cmd = XENPF_enter_acpi_sleep, ++ .interface_version = XENPF_INTERFACE_VERSION, ++ .u = { ++ .enter_acpi_sleep = { ++ .pm1a_cnt_val = pm1a_cnt_val, ++ .pm1b_cnt_val = pm1b_cnt_val, ++ .sleep_state = sleep_state, ++ }, ++ }, ++ }; ++ ++ return HYPERVISOR_platform_op(&op); ++} ++#endif /* CONFIG_XEN */ ++ + /* + * Check if the CPU can handle C2 and deeper + */ +@@ -156,7 +181,9 @@ static inline void disable_acpi(void) { + + #endif /* !CONFIG_ACPI */ + ++#ifndef CONFIG_XEN + #define ARCH_HAS_POWER_INIT 1 ++#endif + + struct bootnode; + +Index: head-2008-12-01/include/asm-x86/apic.h +=================================================================== +--- head-2008-12-01.orig/include/asm-x86/apic.h 2008-12-01 10:53:14.000000000 +0100 ++++ head-2008-12-01/include/asm-x86/apic.h 2008-12-01 11:11:08.000000000 +0100 +@@ -12,7 +12,9 @@ + #include + #include + ++#ifndef CONFIG_XEN + #define ARCH_APICTIMER_STOPS_ON_C3 1 ++#endif + + /* + * Debugging macros +Index: head-2008-12-01/include/asm-x86/kexec.h +=================================================================== +--- head-2008-12-01.orig/include/asm-x86/kexec.h 2008-12-01 10:53:14.000000000 +0100 ++++ head-2008-12-01/include/asm-x86/kexec.h 2008-12-01 11:11:08.000000000 +0100 +@@ -170,6 +170,19 @@ relocate_kernel(unsigned long indirectio + unsigned long start_address) ATTRIB_NORET; + #endif + ++/* Under Xen we need to work with machine addresses. These macros give the ++ * machine address of a certain page to the generic kexec code instead of ++ * the pseudo physical address which would be given by the default macros. ++ */ ++ ++#ifdef CONFIG_XEN ++#define KEXEC_ARCH_HAS_PAGE_MACROS ++#define kexec_page_to_pfn(page) pfn_to_mfn(page_to_pfn(page)) ++#define kexec_pfn_to_page(pfn) pfn_to_page(mfn_to_pfn(pfn)) ++#define kexec_virt_to_phys(addr) virt_to_machine(addr) ++#define kexec_phys_to_virt(addr) phys_to_virt(machine_to_phys(addr)) ++#endif ++ + #endif /* __ASSEMBLY__ */ + + #endif /* _KEXEC_H */ diff --git a/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-arch-x86_64.diff b/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-arch-x86_64.diff new file mode 100644 index 0000000000..ec035984d0 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-arch-x86_64.diff @@ -0,0 +1,248 @@ +Subject: xen3 arch-x86_64 +From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 728:832aac894efd) +Patch-mainline: obsolete +Acked-by: jbeulich@novell.com + +Index: head-2008-11-25/arch/x86/kernel/asm-offsets_64.c +=================================================================== +--- head-2008-11-25.orig/arch/x86/kernel/asm-offsets_64.c 2008-11-25 12:33:06.000000000 +0100 ++++ head-2008-11-25/arch/x86/kernel/asm-offsets_64.c 2008-11-25 12:35:54.000000000 +0100 +@@ -122,8 +122,10 @@ int main(void) + ENTRY(cr8); + BLANK(); + #undef ENTRY ++#ifndef CONFIG_X86_NO_TSS + DEFINE(TSS_ist, offsetof(struct tss_struct, x86_tss.ist)); + BLANK(); ++#endif + DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx)); + BLANK(); + DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); +Index: head-2008-11-25/arch/x86/kernel/machine_kexec_64.c +=================================================================== +--- head-2008-11-25.orig/arch/x86/kernel/machine_kexec_64.c 2008-08-18 10:13:08.000000000 +0200 ++++ head-2008-11-25/arch/x86/kernel/machine_kexec_64.c 2008-11-25 12:35:54.000000000 +0100 +@@ -27,6 +27,119 @@ static u64 kexec_pud1[512] PAGE_ALIGNED; + static u64 kexec_pmd1[512] PAGE_ALIGNED; + static u64 kexec_pte1[512] PAGE_ALIGNED; + ++#ifdef CONFIG_XEN ++ ++/* In the case of Xen, override hypervisor functions to be able to create ++ * a regular identity mapping page table... ++ */ ++ ++#include ++#include ++ ++#define x__pmd(x) ((pmd_t) { (x) } ) ++#define x__pud(x) ((pud_t) { (x) } ) ++#define x__pgd(x) ((pgd_t) { (x) } ) ++ ++#define x_pmd_val(x) ((x).pmd) ++#define x_pud_val(x) ((x).pud) ++#define x_pgd_val(x) ((x).pgd) ++ ++static inline void x_set_pmd(pmd_t *dst, pmd_t val) ++{ ++ x_pmd_val(*dst) = x_pmd_val(val); ++} ++ ++static inline void x_set_pud(pud_t *dst, pud_t val) ++{ ++ x_pud_val(*dst) = phys_to_machine(x_pud_val(val)); ++} ++ ++static inline void x_pud_clear (pud_t *pud) ++{ ++ x_pud_val(*pud) = 0; ++} ++ ++static inline void x_set_pgd(pgd_t *dst, pgd_t val) ++{ ++ x_pgd_val(*dst) = phys_to_machine(x_pgd_val(val)); ++} ++ ++static inline void x_pgd_clear (pgd_t * pgd) ++{ ++ x_pgd_val(*pgd) = 0; ++} ++ ++#define X__PAGE_KERNEL_LARGE_EXEC \ ++ _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_PSE ++#define X_KERNPG_TABLE _PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY ++ ++#define __ma(x) (pfn_to_mfn(__pa((x)) >> PAGE_SHIFT) << PAGE_SHIFT) ++ ++#if PAGES_NR > KEXEC_XEN_NO_PAGES ++#error PAGES_NR is greater than KEXEC_XEN_NO_PAGES - Xen support will break ++#endif ++ ++#if PA_CONTROL_PAGE != 0 ++#error PA_CONTROL_PAGE is non zero - Xen support will break ++#endif ++ ++void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, struct kimage *image) ++{ ++ void *control_page; ++ void *table_page; ++ ++ memset(xki->page_list, 0, sizeof(xki->page_list)); ++ ++ control_page = page_address(image->control_code_page) + PAGE_SIZE; ++ memcpy(control_page, relocate_kernel, PAGE_SIZE); ++ ++ table_page = page_address(image->control_code_page); ++ ++ xki->page_list[PA_CONTROL_PAGE] = __ma(control_page); ++ xki->page_list[PA_TABLE_PAGE] = __ma(table_page); ++ ++ xki->page_list[PA_PGD] = __ma(kexec_pgd); ++ xki->page_list[PA_PUD_0] = __ma(kexec_pud0); ++ xki->page_list[PA_PUD_1] = __ma(kexec_pud1); ++ xki->page_list[PA_PMD_0] = __ma(kexec_pmd0); ++ xki->page_list[PA_PMD_1] = __ma(kexec_pmd1); ++ xki->page_list[PA_PTE_0] = __ma(kexec_pte0); ++ xki->page_list[PA_PTE_1] = __ma(kexec_pte1); ++} ++ ++int __init machine_kexec_setup_resources(struct resource *hypervisor, ++ struct resource *phys_cpus, ++ int nr_phys_cpus) ++{ ++ int k; ++ ++ /* The per-cpu crash note resources belong to the hypervisor resource */ ++ for (k = 0; k < nr_phys_cpus; k++) ++ request_resource(hypervisor, phys_cpus + k); ++ ++ return 0; ++} ++ ++void machine_kexec_register_resources(struct resource *res) { ; } ++ ++#else /* CONFIG_XEN */ ++ ++#define x__pmd(x) __pmd(x) ++#define x__pud(x) __pud(x) ++#define x__pgd(x) __pgd(x) ++ ++#define x_set_pmd(x, y) set_pmd(x, y) ++#define x_set_pud(x, y) set_pud(x, y) ++#define x_set_pgd(x, y) set_pgd(x, y) ++ ++#define x_pud_clear(x) pud_clear(x) ++#define x_pgd_clear(x) pgd_clear(x) ++ ++#define X__PAGE_KERNEL_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC ++#define X_KERNPG_TABLE _KERNPG_TABLE ++ ++#endif /* CONFIG_XEN */ ++ + static void init_level2_page(pmd_t *level2p, unsigned long addr) + { + unsigned long end_addr; +@@ -34,7 +147,7 @@ static void init_level2_page(pmd_t *leve + addr &= PAGE_MASK; + end_addr = addr + PUD_SIZE; + while (addr < end_addr) { +- set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); ++ x_set_pmd(level2p++, x__pmd(addr | X__PAGE_KERNEL_LARGE_EXEC)); + addr += PMD_SIZE; + } + } +@@ -59,12 +172,12 @@ static int init_level3_page(struct kimag + } + level2p = (pmd_t *)page_address(page); + init_level2_page(level2p, addr); +- set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE)); ++ x_set_pud(level3p++, x__pud(__pa(level2p) | X_KERNPG_TABLE)); + addr += PUD_SIZE; + } + /* clear the unused entries */ + while (addr < end_addr) { +- pud_clear(level3p++); ++ x_pud_clear(level3p++); + addr += PUD_SIZE; + } + out: +@@ -95,12 +208,12 @@ static int init_level4_page(struct kimag + if (result) { + goto out; + } +- set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); ++ x_set_pgd(level4p++, x__pgd(__pa(level3p) | X_KERNPG_TABLE)); + addr += PGDIR_SIZE; + } + /* clear the unused entries */ + while (addr < end_addr) { +- pgd_clear(level4p++); ++ x_pgd_clear(level4p++); + addr += PGDIR_SIZE; + } + out: +@@ -111,8 +224,14 @@ out: + static int init_pgtable(struct kimage *image, unsigned long start_pgtable) + { + pgd_t *level4p; ++ unsigned long x_max_pfn = max_pfn; ++ ++#ifdef CONFIG_XEN ++ x_max_pfn = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL); ++#endif ++ + level4p = (pgd_t *)__va(start_pgtable); +- return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); ++ return init_level4_page(image, level4p, 0, x_max_pfn << PAGE_SHIFT); + } + + int machine_kexec_prepare(struct kimage *image) +@@ -136,6 +255,7 @@ void machine_kexec_cleanup(struct kimage + return; + } + ++#ifndef CONFIG_XEN + /* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. +@@ -176,6 +296,7 @@ void machine_kexec(struct kimage *image) + relocate_kernel((unsigned long)image->head, (unsigned long)page_list, + image->start); + } ++#endif + + void arch_crash_save_vmcoreinfo(void) + { +Index: head-2008-11-25/arch/x86/power/cpu_64.c +=================================================================== +--- head-2008-11-25.orig/arch/x86/power/cpu_64.c 2008-11-25 12:33:06.000000000 +0100 ++++ head-2008-11-25/arch/x86/power/cpu_64.c 2008-11-25 12:35:54.000000000 +0100 +@@ -135,6 +135,7 @@ void restore_processor_state(void) + + static void fix_processor_context(void) + { ++#ifndef CONFIG_X86_NO_TSS + int cpu = smp_processor_id(); + struct tss_struct *t = &per_cpu(init_tss, cpu); + +@@ -146,6 +147,7 @@ static void fix_processor_context(void) + set_tss_desc(cpu, t); + + get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; ++#endif + + syscall_init(); /* This sets MSR_*STAR and related */ + load_TR_desc(); /* This does ltr */ +Index: head-2008-11-25/arch/x86/vdso/Makefile +=================================================================== +--- head-2008-11-25.orig/arch/x86/vdso/Makefile 2008-11-25 12:33:06.000000000 +0100 ++++ head-2008-11-25/arch/x86/vdso/Makefile 2008-11-25 12:35:54.000000000 +0100 +@@ -65,6 +65,8 @@ obj-$(VDSO32-y) += vdso32-syms.lds + vdso32.so-$(VDSO32-y) += int80 + vdso32.so-$(CONFIG_COMPAT) += syscall + vdso32.so-$(VDSO32-y) += sysenter ++xen-vdso32-$(subst 1,$(CONFIG_COMPAT),$(shell expr $(CONFIG_XEN_COMPAT)0 '<' 0x0302000)) += int80 ++vdso32.so-$(CONFIG_XEN) += $(xen-vdso32-y) + + vdso32-images = $(vdso32.so-y:%=vdso32-%.so) + diff --git a/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-common.diff b/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-common.diff new file mode 100644 index 0000000000..d771b84eed --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-common.diff @@ -0,0 +1,4189 @@ +Subject: xen3 common +From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 728:832aac894efd) +Patch-mainline: obsolete +Acked-by: jbeulich@novell.com + +List of files that don't require modification anymore (and hence +removed from this patch), for reference and in case upstream wants to +take the forward porting patches: +2.6.22/include/linux/sched.h +2.6.22/kernel/softlockup.c +2.6.22/kernel/timer.c +2.6.25/mm/highmem.c + +--- + drivers/Makefile | 1 + drivers/acpi/Makefile | 3 + drivers/acpi/hardware/hwsleep.c | 15 + drivers/acpi/processor_core.c | 72 +++ + drivers/acpi/processor_extcntl.c | 241 +++++++++++ + drivers/acpi/processor_idle.c | 24 - + drivers/acpi/processor_perflib.c | 21 + drivers/acpi/sleep/main.c | 9 + drivers/char/agp/intel-agp.c | 10 + drivers/char/mem.c | 16 + drivers/char/tpm/Makefile | 2 + drivers/char/tpm/tpm.h | 15 + drivers/char/tpm/tpm_vtpm.c | 542 +++++++++++++++++++++++++ + drivers/char/tpm/tpm_vtpm.h | 55 ++ + drivers/char/tpm/tpm_xen.c | 722 ++++++++++++++++++++++++++++++++++ + drivers/ide/ide-lib.c | 8 + drivers/oprofile/buffer_sync.c | 87 +++- + drivers/oprofile/cpu_buffer.c | 51 +- + drivers/oprofile/cpu_buffer.h | 9 + drivers/oprofile/event_buffer.h | 3 + drivers/oprofile/oprof.c | 30 + + drivers/oprofile/oprof.h | 3 + drivers/oprofile/oprofile_files.c | 201 +++++++++ + fs/aio.c | 119 +++++ + fs/compat_ioctl.c | 19 + include/acpi/processor.h | 143 ++++++ + include/asm-generic/pci.h | 2 + include/asm-generic/pgtable.h | 4 + include/linux/aio.h | 5 + include/linux/highmem.h | 8 + include/linux/interrupt.h | 6 + include/linux/kexec.h | 13 + include/linux/mm.h | 8 + include/linux/oprofile.h | 12 + include/linux/page-flags.h | 27 + + include/linux/pci.h | 12 + include/linux/skbuff.h | 8 + include/linux/vermagic.h | 7 + kernel/irq/spurious.c | 2 + kernel/kexec.c | 71 ++- + kernel/sysctl.c | 2 + mm/memory.c | 42 + + mm/mprotect.c | 2 + mm/page_alloc.c | 12 + net/core/dev.c | 62 ++ + net/core/skbuff.c | 4 + net/ipv4/netfilter/nf_nat_proto_tcp.c | 3 + net/ipv4/netfilter/nf_nat_proto_udp.c | 4 + net/ipv4/xfrm4_output.c | 2 + scripts/Makefile.build | 14 + scripts/Makefile.lib | 6 + 51 files changed, 2673 insertions(+), 86 deletions(-) + +Index: linux-2.6.27/drivers/Makefile +=================================================================== +--- linux-2.6.27.orig/drivers/Makefile ++++ linux-2.6.27/drivers/Makefile +@@ -37,6 +37,7 @@ obj-y += base/ block/ misc/ mfd/ net/ + obj-$(CONFIG_NUBUS) += nubus/ + obj-$(CONFIG_ATM) += atm/ + obj-y += macintosh/ ++obj-$(CONFIG_XEN) += xen/ + obj-$(CONFIG_SCSI) += scsi/ + obj-$(CONFIG_ATA) += ata/ + obj-$(CONFIG_IDE) += ide/ +Index: linux-2.6.27/drivers/acpi/Makefile +=================================================================== +--- linux-2.6.27.orig/drivers/acpi/Makefile ++++ linux-2.6.27/drivers/acpi/Makefile +@@ -34,6 +34,9 @@ processor-objs += processor_core.o proce + ifdef CONFIG_CPU_FREQ + processor-objs += processor_perflib.o + endif ++ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL ++processor-objs += processor_perflib.o processor_extcntl.o ++endif + + obj-y += sleep/ + obj-y += bus.o glue.o +Index: linux-2.6.27/drivers/acpi/hardware/hwsleep.c +=================================================================== +--- linux-2.6.27.orig/drivers/acpi/hardware/hwsleep.c ++++ linux-2.6.27/drivers/acpi/hardware/hwsleep.c +@@ -241,7 +241,11 @@ acpi_status asmlinkage acpi_enter_sleep_ + u32 PM1Bcontrol; + struct acpi_bit_register_info *sleep_type_reg_info; + struct acpi_bit_register_info *sleep_enable_reg_info; ++#if !(defined(CONFIG_XEN) && defined(CONFIG_X86)) + u32 in_value; ++#else ++ int err; ++#endif + struct acpi_object_list arg_list; + union acpi_object arg; + acpi_status status; +@@ -351,6 +355,7 @@ acpi_status asmlinkage acpi_enter_sleep_ + + ACPI_FLUSH_CPU_CACHE(); + ++#if !(defined(CONFIG_XEN) && defined(CONFIG_X86)) + status = acpi_hw_register_write(ACPI_REGISTER_PM1A_CONTROL, + PM1Acontrol); + if (ACPI_FAILURE(status)) { +@@ -397,6 +402,16 @@ acpi_status asmlinkage acpi_enter_sleep_ + /* Spin until we wake */ + + } while (!in_value); ++#else ++ /* PV ACPI just need check hypercall return value */ ++ err = acpi_notify_hypervisor_state(sleep_state, ++ PM1Acontrol, PM1Bcontrol); ++ if (err) { ++ ACPI_DEBUG_PRINT((ACPI_DB_ERROR, ++ "Hypervisor failure [%d]\n", err)); ++ return_ACPI_STATUS(AE_ERROR); ++ } ++#endif + + return_ACPI_STATUS(AE_OK); + } +Index: linux-2.6.27/drivers/acpi/processor_core.c +=================================================================== +--- linux-2.6.27.orig/drivers/acpi/processor_core.c ++++ linux-2.6.27/drivers/acpi/processor_core.c +@@ -620,7 +620,8 @@ static int acpi_processor_get_info(struc + */ + if (pr->id == -1) { + if (ACPI_FAILURE +- (acpi_processor_hotadd_init(pr->handle, &pr->id))) { ++ (acpi_processor_hotadd_init(pr->handle, &pr->id)) && ++ !processor_cntl_external()) { + return -ENODEV; + } + } +@@ -662,7 +663,11 @@ static int acpi_processor_get_info(struc + return 0; + } + ++#ifndef CONFIG_XEN + static DEFINE_PER_CPU(void *, processor_device_array); ++#else ++static void *processor_device_array[NR_ACPI_CPUS]; ++#endif + + static int __cpuinit acpi_processor_start(struct acpi_device *device) + { +@@ -671,30 +676,46 @@ static int __cpuinit acpi_processor_star + struct acpi_processor *pr; + struct sys_device *sysdev; + ++ processor_extcntl_init(); ++ + pr = acpi_driver_data(device); + + result = acpi_processor_get_info(device); +- if (result) { ++ if (result || ++ ((pr->id == -1) && !processor_cntl_external())) { + /* Processor is physically not present */ + return 0; + } + +- BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0)); ++ BUG_ON(!processor_cntl_external() && ++ ((pr->id >= nr_cpu_ids) || (pr->id < 0))); + + /* + * Buggy BIOS check + * ACPI id of processors can be reported wrongly by the BIOS. + * Don't trust it blindly + */ ++#ifndef CONFIG_XEN + if (per_cpu(processor_device_array, pr->id) != NULL && + per_cpu(processor_device_array, pr->id) != device) { ++#else ++ BUG_ON(pr->acpi_id >= NR_ACPI_CPUS); ++ if (processor_device_array[pr->acpi_id] != NULL && ++ processor_device_array[pr->acpi_id] != device) { ++#endif + printk(KERN_WARNING "BIOS reported wrong ACPI id " + "for the processor\n"); + return -ENODEV; + } ++#ifndef CONFIG_XEN + per_cpu(processor_device_array, pr->id) = device; + + per_cpu(processors, pr->id) = pr; ++#else ++ processor_device_array[pr->acpi_id] = device; ++ if (pr->id != -1) ++ per_cpu(processors, pr->id) = pr; ++#endif + + result = acpi_processor_add_fs(device); + if (result) +@@ -710,15 +731,28 @@ static int __cpuinit acpi_processor_star + /* _PDC call should be done before doing anything else (if reqd.). */ + arch_acpi_processor_init_pdc(pr); + acpi_processor_set_pdc(pr); +-#ifdef CONFIG_CPU_FREQ ++#if defined(CONFIG_CPU_FREQ) || defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL) + acpi_processor_ppc_has_changed(pr); + #endif +- acpi_processor_get_throttling_info(pr); +- acpi_processor_get_limit_info(pr); ++ ++ /* ++ * pr->id may equal to -1 while processor_cntl_external enabled. ++ * throttle and thermal module don't support this case. ++ * Tx only works when dom0 vcpu == pcpu num by far, as we give ++ * control to dom0. ++ */ ++ if (pr->id != -1) { ++ acpi_processor_get_throttling_info(pr); ++ acpi_processor_get_limit_info(pr); ++ } + + + acpi_processor_power_init(pr, device); + ++ result = processor_extcntl_prepare(pr); ++ if (result) ++ goto end; ++ + pr->cdev = thermal_cooling_device_register("Processor", device, + &processor_cooling_ops); + if (IS_ERR(pr->cdev)) { +@@ -846,7 +880,7 @@ static int acpi_processor_remove(struct + + pr = acpi_driver_data(device); + +- if (pr->id >= nr_cpu_ids) { ++ if (!processor_cntl_external() && pr->id >= nr_cpu_ids) { + kfree(pr); + return 0; + } +@@ -872,8 +906,14 @@ static int acpi_processor_remove(struct + pr->cdev = NULL; + } + ++#ifndef CONFIG_XEN + per_cpu(processors, pr->id) = NULL; + per_cpu(processor_device_array, pr->id) = NULL; ++#else ++ if (pr->id != -1) ++ per_cpu(processors, pr->id) = NULL; ++ processor_device_array[pr->acpi_id] = NULL; ++#endif + kfree(pr); + + return 0; +@@ -933,6 +973,10 @@ int acpi_processor_device_add(acpi_handl + if (!pr) + return -ENODEV; + ++ if (processor_cntl_external()) ++ processor_notify_external(pr, ++ PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD); ++ + if ((pr->id >= 0) && (pr->id < nr_cpu_ids)) { + kobject_uevent(&(*device)->dev.kobj, KOBJ_ONLINE); + } +@@ -972,6 +1016,10 @@ static void __ref acpi_processor_hotplug + break; + } + ++ if (processor_cntl_external()) ++ processor_notify_external(pr, ++ PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD); ++ + if (pr->id >= 0 && (pr->id < nr_cpu_ids)) { + kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE); + break; +@@ -1003,6 +1051,11 @@ static void __ref acpi_processor_hotplug + + if ((pr->id < nr_cpu_ids) && (cpu_present(pr->id))) + kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE); ++ ++ if (processor_cntl_external()) ++ processor_notify_external(pr, PROCESSOR_HOTPLUG, ++ HOTPLUG_TYPE_REMOVE); ++ + break; + default: + ACPI_DEBUG_PRINT((ACPI_DB_INFO, +@@ -1067,6 +1120,11 @@ static acpi_status acpi_processor_hotadd + + static int acpi_processor_handle_eject(struct acpi_processor *pr) + { ++#ifdef CONFIG_XEN ++ if (pr->id == -1) ++ return (0); ++#endif ++ + if (cpu_online(pr->id)) + cpu_down(pr->id); + +Index: linux-2.6.27/drivers/acpi/processor_extcntl.c +=================================================================== +--- /dev/null ++++ linux-2.6.27/drivers/acpi/processor_extcntl.c +@@ -0,0 +1,241 @@ ++/* ++ * processor_extcntl.c - channel to external control logic ++ * ++ * Copyright (C) 2008, Intel corporation ++ * ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at ++ * your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along ++ * with this program; if not, write to the Free Software Foundation, Inc., ++ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#define ACPI_PROCESSOR_COMPONENT 0x01000000 ++#define ACPI_PROCESSOR_CLASS "processor" ++#define ACPI_PROCESSOR_DRIVER_NAME "ACPI Processor Driver" ++#define _COMPONENT ACPI_PROCESSOR_COMPONENT ++ACPI_MODULE_NAME("acpi_processor") ++ ++static int processor_extcntl_parse_csd(struct acpi_processor *pr); ++static int processor_extcntl_get_performance(struct acpi_processor *pr); ++/* ++ * External processor control logic may register with its own set of ++ * ops to get ACPI related notification. One example is like VMM. ++ */ ++const struct processor_extcntl_ops *processor_extcntl_ops; ++EXPORT_SYMBOL(processor_extcntl_ops); ++ ++static int processor_notify_smm(void) ++{ ++ acpi_status status; ++ static int is_done = 0; ++ ++ /* only need successfully notify BIOS once */ ++ /* avoid double notification which may lead to unexpected result */ ++ if (is_done) ++ return 0; ++ ++ /* Can't write pstate_cnt to smi_cmd if either value is zero */ ++ if ((!acpi_fadt.smi_cmd) || (!acpi_fadt.pstate_cnt)) { ++ ACPI_DEBUG_PRINT((ACPI_DB_INFO,"No SMI port or pstate_cnt\n")); ++ return 0; ++ } ++ ++ ACPI_DEBUG_PRINT((ACPI_DB_INFO, ++ "Writing pstate_cnt [0x%x] to smi_cmd [0x%x]\n", ++ acpi_fadt.pstate_cnt, acpi_fadt.smi_cmd)); ++ ++ /* FADT v1 doesn't support pstate_cnt, many BIOS vendors use ++ * it anyway, so we need to support it... */ ++ if (acpi_fadt_is_v1) { ++ ACPI_DEBUG_PRINT((ACPI_DB_INFO, ++ "Using v1.0 FADT reserved value for pstate_cnt\n")); ++ } ++ ++ status = acpi_os_write_port(acpi_fadt.smi_cmd, ++ (u32) acpi_fadt.pstate_cnt, 8); ++ if (ACPI_FAILURE(status)) ++ return status; ++ ++ is_done = 1; ++ ++ return 0; ++} ++ ++int processor_notify_external(struct acpi_processor *pr, int event, int type) ++{ ++ int ret = -EINVAL; ++ ++ if (!processor_cntl_external()) ++ return -EINVAL; ++ ++ switch (event) { ++ case PROCESSOR_PM_INIT: ++ case PROCESSOR_PM_CHANGE: ++ if ((type >= PM_TYPE_MAX) || ++ !processor_extcntl_ops->pm_ops[type]) ++ break; ++ ++ ret = processor_extcntl_ops->pm_ops[type](pr, event); ++ break; ++ case PROCESSOR_HOTPLUG: ++ if (processor_extcntl_ops->hotplug) ++ ret = processor_extcntl_ops->hotplug(pr, type); ++ break; ++ default: ++ printk(KERN_ERR "Unsupport processor events %d.\n", event); ++ break; ++ } ++ ++ return ret; ++} ++ ++/* ++ * External control logic can decide to grab full or part of physical ++ * processor control bits. Take a VMM for example, physical processors ++ * are owned by VMM and thus existence information like hotplug is ++ * always required to be notified to VMM. Similar is processor idle ++ * state which is also necessarily controlled by VMM. But for other ++ * control bits like performance/throttle states, VMM may choose to ++ * control or not upon its own policy. ++ */ ++void processor_extcntl_init(void) ++{ ++ if (!processor_extcntl_ops) ++ arch_acpi_processor_init_extcntl(&processor_extcntl_ops); ++} ++ ++/* ++ * This is called from ACPI processor init, and targeted to hold ++ * some tricky housekeeping jobs to satisfy external control model. ++ * For example, we may put dependency parse stub here for idle ++ * and performance state. Those information may be not available ++ * if splitting from dom0 control logic like cpufreq driver. ++ */ ++int processor_extcntl_prepare(struct acpi_processor *pr) ++{ ++ /* parse cstate dependency information */ ++ if (processor_pm_external()) ++ processor_extcntl_parse_csd(pr); ++ ++ /* Initialize performance states */ ++ if (processor_pmperf_external()) ++ processor_extcntl_get_performance(pr); ++ ++ return 0; ++} ++ ++/* ++ * Currently no _CSD is implemented which is why existing ACPI code ++ * doesn't parse _CSD at all. But to keep interface complete with ++ * external control logic, we put a placeholder here for future ++ * compatibility. ++ */ ++static int processor_extcntl_parse_csd(struct acpi_processor *pr) ++{ ++ int i; ++ ++ for (i = 0; i < pr->power.count; i++) { ++ if (!pr->power.states[i].valid) ++ continue; ++ ++ /* No dependency by default */ ++ pr->power.states[i].domain_info = NULL; ++ pr->power.states[i].csd_count = 0; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Existing ACPI module does parse performance states at some point, ++ * when acpi-cpufreq driver is loaded which however is something ++ * we'd like to disable to avoid confliction with external control ++ * logic. So we have to collect raw performance information here ++ * when ACPI processor object is found and started. ++ */ ++static int processor_extcntl_get_performance(struct acpi_processor *pr) ++{ ++ int ret; ++ struct acpi_processor_performance *perf; ++ struct acpi_psd_package *pdomain; ++ ++ if (pr->performance) ++ return -EBUSY; ++ ++ perf = kzalloc(sizeof(struct acpi_processor_performance), GFP_KERNEL); ++ if (!perf) ++ return -ENOMEM; ++ ++ pr->performance = perf; ++ /* Get basic performance state information */ ++ ret = acpi_processor_get_performance_info(pr); ++ if (ret < 0) ++ goto err_out; ++ ++ /* ++ * Well, here we need retrieve performance dependency information ++ * from _PSD object. The reason why existing interface is not used ++ * is due to the reason that existing interface sticks to Linux cpu ++ * id to construct some bitmap, however we want to split ACPI ++ * processor objects from Linux cpu id logic. For example, even ++ * when Linux is configured as UP, we still want to parse all ACPI ++ * processor objects to external logic. In this case, it's preferred ++ * to use ACPI ID instead. ++ */ ++ pdomain = &pr->performance->domain_info; ++ pdomain->num_processors = 0; ++ ret = acpi_processor_get_psd(pr); ++ if (ret < 0) { ++ /* ++ * _PSD is optional - assume no coordination if absent (or ++ * broken), matching native kernels' behavior. ++ */ ++ pdomain->num_entries = ACPI_PSD_REV0_ENTRIES; ++ pdomain->revision = ACPI_PSD_REV0_REVISION; ++ pdomain->domain = pr->acpi_id; ++ pdomain->coord_type = DOMAIN_COORD_TYPE_SW_ALL; ++ pdomain->num_processors = 1; ++ } ++ ++ /* Some sanity check */ ++ if ((pdomain->revision != ACPI_PSD_REV0_REVISION) || ++ (pdomain->num_entries != ACPI_PSD_REV0_ENTRIES) || ++ ((pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ALL) && ++ (pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ANY) && ++ (pdomain->coord_type != DOMAIN_COORD_TYPE_HW_ALL))) { ++ ret = -EINVAL; ++ goto err_out; ++ } ++ ++ /* Last step is to notify BIOS that external logic exists */ ++ processor_notify_smm(); ++ ++ processor_notify_external(pr, PROCESSOR_PM_INIT, PM_TYPE_PERF); ++ ++ return 0; ++err_out: ++ pr->performance = NULL; ++ kfree(perf); ++ return ret; ++} +Index: linux-2.6.27/drivers/acpi/processor_idle.c +=================================================================== +--- linux-2.6.27.orig/drivers/acpi/processor_idle.c ++++ linux-2.6.27/drivers/acpi/processor_idle.c +@@ -908,7 +908,8 @@ static int acpi_processor_get_power_info + */ + cx.entry_method = ACPI_CSTATE_HALT; + snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT"); +- } else { ++ /* This doesn't apply to external control case */ ++ } else if (!processor_pm_external()) { + continue; + } + if (cx.type == ACPI_STATE_C1 && +@@ -947,6 +948,12 @@ static int acpi_processor_get_power_info + + cx.power = obj->integer.value; + ++#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL ++ /* cache control methods to notify external logic */ ++ if (processor_pm_external()) ++ memcpy(&cx.reg, reg, sizeof(*reg)); ++#endif ++ + current_count++; + memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx)); + +@@ -1289,14 +1296,18 @@ int acpi_processor_cst_has_changed(struc + * been initialized. + */ + if (pm_idle_save) { +- pm_idle = pm_idle_save; ++ if (!processor_pm_external()) ++ pm_idle = pm_idle_save; + /* Relies on interrupts forcing exit from idle. */ + synchronize_sched(); + } + + pr->flags.power = 0; + result = acpi_processor_get_power_info(pr); +- if ((pr->flags.power == 1) && (pr->flags.power_setup_done)) ++ if (processor_pm_external()) ++ processor_notify_external(pr, ++ PROCESSOR_PM_CHANGE, PM_TYPE_IDLE); ++ else if ((pr->flags.power == 1) && (pr->flags.power_setup_done)) + pm_idle = acpi_processor_idle; + + return result; +@@ -1821,7 +1832,7 @@ int __cpuinit acpi_processor_power_init( + printk(")\n"); + + #ifndef CONFIG_CPU_IDLE +- if (pr->id == 0) { ++ if (!processor_pm_external() && (pr->id == 0)) { + pm_idle_save = pm_idle; + pm_idle = acpi_processor_idle; + } +@@ -1835,6 +1846,11 @@ int __cpuinit acpi_processor_power_init( + acpi_driver_data(device)); + if (!entry) + return -EIO; ++ ++ if (processor_pm_external()) ++ processor_notify_external(pr, ++ PROCESSOR_PM_INIT, PM_TYPE_IDLE); ++ + return 0; + } + +Index: linux-2.6.27/drivers/acpi/processor_perflib.c +=================================================================== +--- linux-2.6.27.orig/drivers/acpi/processor_perflib.c ++++ linux-2.6.27/drivers/acpi/processor_perflib.c +@@ -80,6 +80,7 @@ MODULE_PARM_DESC(ignore_ppc, "If the fre + + static int acpi_processor_ppc_status; + ++#ifdef CONFIG_CPU_FREQ + static int acpi_processor_ppc_notifier(struct notifier_block *nb, + unsigned long event, void *data) + { +@@ -122,6 +123,7 @@ static int acpi_processor_ppc_notifier(s + static struct notifier_block acpi_ppc_notifier_block = { + .notifier_call = acpi_processor_ppc_notifier, + }; ++#endif /* CONFIG_CPU_FREQ */ + + static int acpi_processor_get_platform_limit(struct acpi_processor *pr) + { +@@ -166,9 +168,15 @@ int acpi_processor_ppc_has_changed(struc + if (ret < 0) + return (ret); + else ++#ifdef CONFIG_CPU_FREQ + return cpufreq_update_policy(pr->id); ++#elif CONFIG_PROCESSOR_EXTERNAL_CONTROL ++ return processor_notify_external(pr, ++ PROCESSOR_PM_CHANGE, PM_TYPE_PERF); ++#endif + } + ++#ifdef CONFIG_CPU_FREQ + void acpi_processor_ppc_init(void) + { + if (!cpufreq_register_notifier +@@ -187,6 +195,7 @@ void acpi_processor_ppc_exit(void) + + acpi_processor_ppc_status &= ~PPC_REGISTERED; + } ++#endif /* CONFIG_CPU_FREQ */ + + static int acpi_processor_get_performance_control(struct acpi_processor *pr) + { +@@ -328,7 +337,10 @@ static int acpi_processor_get_performanc + return result; + } + +-static int acpi_processor_get_performance_info(struct acpi_processor *pr) ++#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL ++static ++#endif ++int acpi_processor_get_performance_info(struct acpi_processor *pr) + { + int result = 0; + acpi_status status = AE_OK; +@@ -356,6 +368,7 @@ static int acpi_processor_get_performanc + return 0; + } + ++#ifdef CONFIG_CPU_FREQ + int acpi_processor_notify_smm(struct module *calling_module) + { + acpi_status status; +@@ -416,6 +429,7 @@ int acpi_processor_notify_smm(struct mod + } + + EXPORT_SYMBOL(acpi_processor_notify_smm); ++#endif /* CONFIG_CPU_FREQ */ + + #ifdef CONFIG_X86_ACPI_CPUFREQ_PROC_INTF + /* /proc/acpi/processor/../performance interface (DEPRECATED) */ +@@ -507,7 +521,10 @@ static void acpi_cpufreq_remove_file(str + } + #endif /* CONFIG_X86_ACPI_CPUFREQ_PROC_INTF */ + +-static int acpi_processor_get_psd(struct acpi_processor *pr) ++#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL ++static ++#endif ++int acpi_processor_get_psd(struct acpi_processor *pr) + { + int result = 0; + acpi_status status = AE_OK; +Index: linux-2.6.27/drivers/acpi/sleep/main.c +=================================================================== +--- linux-2.6.27.orig/drivers/acpi/sleep/main.c ++++ linux-2.6.27/drivers/acpi/sleep/main.c +@@ -27,6 +27,7 @@ u8 sleep_states[ACPI_S_STATE_COUNT]; + static int acpi_sleep_prepare(u32 acpi_state) + { + #ifdef CONFIG_ACPI_SLEEP ++#ifndef CONFIG_ACPI_PV_SLEEP + /* do we have a wakeup address for S2 and S3? */ + if (acpi_state == ACPI_STATE_S3) { + if (!acpi_wakeup_address) { +@@ -36,6 +37,7 @@ static int acpi_sleep_prepare(u32 acpi_s + (acpi_physical_address)acpi_wakeup_address); + + } ++#endif + ACPI_FLUSH_CPU_CACHE(); + acpi_enable_wakeup_device_prep(acpi_state); + #endif +@@ -208,7 +210,14 @@ static int acpi_suspend_enter(suspend_st + break; + + case ACPI_STATE_S3: ++#ifdef CONFIG_ACPI_PV_SLEEP ++ /* Hyperviosr will save and restore CPU context ++ * and then we can skip low level housekeeping here. ++ */ ++ acpi_enter_sleep_state(acpi_state); ++#else + do_suspend_lowlevel(); ++#endif + break; + } + +Index: linux-2.6.27/drivers/char/agp/intel-agp.c +=================================================================== +--- linux-2.6.27.orig/drivers/char/agp/intel-agp.c ++++ linux-2.6.27/drivers/char/agp/intel-agp.c +@@ -250,6 +250,13 @@ static void *i8xx_alloc_pages(void) + if (page == NULL) + return NULL; + ++#ifdef CONFIG_XEN ++ if (xen_create_contiguous_region((unsigned long)page_address(page), 2, 32)) { ++ __free_pages(page, 2); ++ return NULL; ++ } ++#endif ++ + if (set_pages_uc(page, 4) < 0) { + set_pages_wb(page, 4); + __free_pages(page, 2); +@@ -269,6 +276,9 @@ static void i8xx_destroy_pages(void *add + + page = virt_to_page(addr); + set_pages_wb(page, 4); ++#ifdef CONFIG_XEN ++ xen_destroy_contiguous_region((unsigned long)page_address(page), 2); ++#endif + put_page(page); + __free_pages(page, 2); + atomic_dec(&agp_bridge->current_memory_agp); +Index: linux-2.6.27/drivers/char/mem.c +=================================================================== +--- linux-2.6.27.orig/drivers/char/mem.c ++++ linux-2.6.27/drivers/char/mem.c +@@ -110,6 +110,7 @@ void __attribute__((weak)) unxlate_dev_m + { + } + ++#ifndef ARCH_HAS_DEV_MEM + /* + * This funcion reads the *physical* memory. The f_pos points directly to the + * memory location. +@@ -254,6 +255,7 @@ static ssize_t write_mem(struct file * f + *ppos += written; + return written; + } ++#endif + + int __attribute__((weak)) phys_mem_access_prot_allowed(struct file *file, + unsigned long pfn, unsigned long size, pgprot_t *vma_prot) +@@ -372,6 +374,9 @@ static int mmap_mem(struct file * file, + static int mmap_kmem(struct file * file, struct vm_area_struct * vma) + { + unsigned long pfn; ++#ifdef CONFIG_XEN ++ unsigned long i, count; ++#endif + + /* Turn a kernel-virtual address into a physical page frame */ + pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT; +@@ -386,6 +391,13 @@ static int mmap_kmem(struct file * file, + if (!pfn_valid(pfn)) + return -EIO; + ++#ifdef CONFIG_XEN ++ count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; ++ for (i = 0; i < count; i++) ++ if ((pfn + i) != mfn_to_local_pfn(pfn_to_mfn(pfn + i))) ++ return -EIO; ++#endif ++ + vma->vm_pgoff = pfn; + return mmap_mem(file, vma); + } +@@ -905,6 +917,7 @@ static int open_port(struct inode * inod + #define open_kmem open_mem + #define open_oldmem open_mem + ++#ifndef ARCH_HAS_DEV_MEM + static const struct file_operations mem_fops = { + .llseek = memory_lseek, + .read = read_mem, +@@ -913,6 +926,9 @@ static const struct file_operations mem_ + .open = open_mem, + .get_unmapped_area = get_unmapped_area_mem, + }; ++#else ++extern const struct file_operations mem_fops; ++#endif + + #ifdef CONFIG_DEVKMEM + static const struct file_operations kmem_fops = { +Index: linux-2.6.27/drivers/char/tpm/Makefile +=================================================================== +--- linux-2.6.27.orig/drivers/char/tpm/Makefile ++++ linux-2.6.27/drivers/char/tpm/Makefile +@@ -9,3 +9,5 @@ obj-$(CONFIG_TCG_TIS) += tpm_tis.o + obj-$(CONFIG_TCG_NSC) += tpm_nsc.o + obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o + obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o ++obj-$(CONFIG_TCG_XEN) += tpm_xenu.o ++tpm_xenu-y = tpm_xen.o tpm_vtpm.o +Index: linux-2.6.27/drivers/char/tpm/tpm.h +=================================================================== +--- linux-2.6.27.orig/drivers/char/tpm/tpm.h ++++ linux-2.6.27/drivers/char/tpm/tpm.h +@@ -107,6 +107,9 @@ struct tpm_chip { + struct dentry **bios_dir; + + struct list_head list; ++#ifdef CONFIG_XEN ++ void *priv; ++#endif + void (*release) (struct device *); + }; + +@@ -124,6 +127,18 @@ static inline void tpm_write_index(int b + outb(value & 0xFF, base+1); + } + ++#ifdef CONFIG_XEN ++static inline void *chip_get_private(const struct tpm_chip *chip) ++{ ++ return chip->priv; ++} ++ ++static inline void chip_set_private(struct tpm_chip *chip, void *priv) ++{ ++ chip->priv = priv; ++} ++#endif ++ + extern void tpm_get_timeouts(struct tpm_chip *); + extern void tpm_gen_interrupt(struct tpm_chip *); + extern void tpm_continue_selftest(struct tpm_chip *); +Index: linux-2.6.27/drivers/char/tpm/tpm_vtpm.c +=================================================================== +--- /dev/null ++++ linux-2.6.27/drivers/char/tpm/tpm_vtpm.c +@@ -0,0 +1,542 @@ ++/* ++ * Copyright (C) 2006 IBM Corporation ++ * ++ * Authors: ++ * Stefan Berger ++ * ++ * Generic device driver part for device drivers in a virtualized ++ * environment. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include "tpm.h" ++#include "tpm_vtpm.h" ++ ++/* read status bits */ ++enum { ++ STATUS_BUSY = 0x01, ++ STATUS_DATA_AVAIL = 0x02, ++ STATUS_READY = 0x04 ++}; ++ ++struct transmission { ++ struct list_head next; ++ ++ unsigned char *request; ++ size_t request_len; ++ size_t request_buflen; ++ ++ unsigned char *response; ++ size_t response_len; ++ size_t response_buflen; ++ ++ unsigned int flags; ++}; ++ ++enum { ++ TRANSMISSION_FLAG_WAS_QUEUED = 0x1 ++}; ++ ++ ++enum { ++ DATAEX_FLAG_QUEUED_ONLY = 0x1 ++}; ++ ++ ++/* local variables */ ++ ++/* local function prototypes */ ++static int _vtpm_send_queued(struct tpm_chip *chip); ++ ++ ++/* ============================================================= ++ * Some utility functions ++ * ============================================================= ++ */ ++static void vtpm_state_init(struct vtpm_state *vtpms) ++{ ++ vtpms->current_request = NULL; ++ spin_lock_init(&vtpms->req_list_lock); ++ init_waitqueue_head(&vtpms->req_wait_queue); ++ INIT_LIST_HEAD(&vtpms->queued_requests); ++ ++ vtpms->current_response = NULL; ++ spin_lock_init(&vtpms->resp_list_lock); ++ init_waitqueue_head(&vtpms->resp_wait_queue); ++ ++ vtpms->disconnect_time = jiffies; ++} ++ ++ ++static inline struct transmission *transmission_alloc(void) ++{ ++ return kzalloc(sizeof(struct transmission), GFP_ATOMIC); ++} ++ ++static unsigned char * ++transmission_set_req_buffer(struct transmission *t, ++ unsigned char *buffer, size_t len) ++{ ++ if (t->request_buflen < len) { ++ kfree(t->request); ++ t->request = kmalloc(len, GFP_KERNEL); ++ if (!t->request) { ++ t->request_buflen = 0; ++ return NULL; ++ } ++ t->request_buflen = len; ++ } ++ ++ memcpy(t->request, buffer, len); ++ t->request_len = len; ++ ++ return t->request; ++} ++ ++static unsigned char * ++transmission_set_res_buffer(struct transmission *t, ++ const unsigned char *buffer, size_t len) ++{ ++ if (t->response_buflen < len) { ++ kfree(t->response); ++ t->response = kmalloc(len, GFP_ATOMIC); ++ if (!t->response) { ++ t->response_buflen = 0; ++ return NULL; ++ } ++ t->response_buflen = len; ++ } ++ ++ memcpy(t->response, buffer, len); ++ t->response_len = len; ++ ++ return t->response; ++} ++ ++static inline void transmission_free(struct transmission *t) ++{ ++ kfree(t->request); ++ kfree(t->response); ++ kfree(t); ++} ++ ++/* ============================================================= ++ * Interface with the lower layer driver ++ * ============================================================= ++ */ ++/* ++ * Lower layer uses this function to make a response available. ++ */ ++int vtpm_vd_recv(const struct tpm_chip *chip, ++ const unsigned char *buffer, size_t count, ++ void *ptr) ++{ ++ unsigned long flags; ++ int ret_size = 0; ++ struct transmission *t; ++ struct vtpm_state *vtpms; ++ ++ vtpms = (struct vtpm_state *)chip_get_private(chip); ++ ++ /* ++ * The list with requests must contain one request ++ * only and the element there must be the one that ++ * was passed to me from the front-end. ++ */ ++ spin_lock_irqsave(&vtpms->resp_list_lock, flags); ++ if (vtpms->current_request != ptr) { ++ spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); ++ return 0; ++ } ++ ++ if ((t = vtpms->current_request)) { ++ transmission_free(t); ++ vtpms->current_request = NULL; ++ } ++ ++ t = transmission_alloc(); ++ if (t) { ++ if (!transmission_set_res_buffer(t, buffer, count)) { ++ transmission_free(t); ++ spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); ++ return -ENOMEM; ++ } ++ ret_size = count; ++ vtpms->current_response = t; ++ wake_up_interruptible(&vtpms->resp_wait_queue); ++ } ++ spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); ++ ++ return ret_size; ++} ++ ++ ++/* ++ * Lower layer indicates its status (connected/disconnected) ++ */ ++void vtpm_vd_status(const struct tpm_chip *chip, u8 vd_status) ++{ ++ struct vtpm_state *vtpms; ++ ++ vtpms = (struct vtpm_state *)chip_get_private(chip); ++ ++ vtpms->vd_status = vd_status; ++ if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) { ++ vtpms->disconnect_time = jiffies; ++ } ++} ++ ++/* ============================================================= ++ * Interface with the generic TPM driver ++ * ============================================================= ++ */ ++static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count) ++{ ++ int rc = 0; ++ unsigned long flags; ++ struct vtpm_state *vtpms; ++ ++ vtpms = (struct vtpm_state *)chip_get_private(chip); ++ ++ /* ++ * Check if the previous operation only queued the command ++ * In this case there won't be a response, so I just ++ * return from here and reset that flag. In any other ++ * case I should receive a response from the back-end. ++ */ ++ spin_lock_irqsave(&vtpms->resp_list_lock, flags); ++ if ((vtpms->flags & DATAEX_FLAG_QUEUED_ONLY) != 0) { ++ vtpms->flags &= ~DATAEX_FLAG_QUEUED_ONLY; ++ spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); ++ /* ++ * The first few commands (measurements) must be ++ * queued since it might not be possible to talk to the ++ * TPM, yet. ++ * Return a response of up to 30 '0's. ++ */ ++ ++ count = min_t(size_t, count, 30); ++ memset(buf, 0x0, count); ++ return count; ++ } ++ /* ++ * Check whether something is in the responselist and if ++ * there's nothing in the list wait for something to appear. ++ */ ++ ++ if (!vtpms->current_response) { ++ spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); ++ interruptible_sleep_on_timeout(&vtpms->resp_wait_queue, ++ 1000); ++ spin_lock_irqsave(&vtpms->resp_list_lock ,flags); ++ } ++ ++ if (vtpms->current_response) { ++ struct transmission *t = vtpms->current_response; ++ vtpms->current_response = NULL; ++ rc = min(count, t->response_len); ++ memcpy(buf, t->response, rc); ++ transmission_free(t); ++ } ++ ++ spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); ++ return rc; ++} ++ ++static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) ++{ ++ int rc = 0; ++ unsigned long flags; ++ struct transmission *t = transmission_alloc(); ++ struct vtpm_state *vtpms; ++ ++ vtpms = (struct vtpm_state *)chip_get_private(chip); ++ ++ if (!t) ++ return -ENOMEM; ++ /* ++ * If there's a current request, it must be the ++ * previous request that has timed out. ++ */ ++ spin_lock_irqsave(&vtpms->req_list_lock, flags); ++ if (vtpms->current_request != NULL) { ++ printk("WARNING: Sending although there is a request outstanding.\n" ++ " Previous request must have timed out.\n"); ++ transmission_free(vtpms->current_request); ++ vtpms->current_request = NULL; ++ } ++ spin_unlock_irqrestore(&vtpms->req_list_lock, flags); ++ ++ /* ++ * Queue the packet if the driver below is not ++ * ready, yet, or there is any packet already ++ * in the queue. ++ * If the driver below is ready, unqueue all ++ * packets first before sending our current ++ * packet. ++ * For each unqueued packet, except for the ++ * last (=current) packet, call the function ++ * tpm_xen_recv to wait for the response to come ++ * back. ++ */ ++ if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) { ++ if (time_after(jiffies, ++ vtpms->disconnect_time + HZ * 10)) { ++ rc = -ENOENT; ++ } else { ++ goto queue_it; ++ } ++ } else { ++ /* ++ * Send all queued packets. ++ */ ++ if (_vtpm_send_queued(chip) == 0) { ++ ++ vtpms->current_request = t; ++ ++ rc = vtpm_vd_send(vtpms->tpm_private, ++ buf, ++ count, ++ t); ++ /* ++ * The generic TPM driver will call ++ * the function to receive the response. ++ */ ++ if (rc < 0) { ++ vtpms->current_request = NULL; ++ goto queue_it; ++ } ++ } else { ++queue_it: ++ if (!transmission_set_req_buffer(t, buf, count)) { ++ transmission_free(t); ++ rc = -ENOMEM; ++ goto exit; ++ } ++ /* ++ * An error occurred. Don't event try ++ * to send the current request. Just ++ * queue it. ++ */ ++ spin_lock_irqsave(&vtpms->req_list_lock, flags); ++ vtpms->flags |= DATAEX_FLAG_QUEUED_ONLY; ++ list_add_tail(&t->next, &vtpms->queued_requests); ++ spin_unlock_irqrestore(&vtpms->req_list_lock, flags); ++ } ++ } ++ ++exit: ++ return rc; ++} ++ ++ ++/* ++ * Send all queued requests. ++ */ ++static int _vtpm_send_queued(struct tpm_chip *chip) ++{ ++ int rc; ++ int error = 0; ++ long flags; ++ unsigned char buffer[1]; ++ struct vtpm_state *vtpms; ++ vtpms = (struct vtpm_state *)chip_get_private(chip); ++ ++ spin_lock_irqsave(&vtpms->req_list_lock, flags); ++ ++ while (!list_empty(&vtpms->queued_requests)) { ++ /* ++ * Need to dequeue them. ++ * Read the result into a dummy buffer. ++ */ ++ struct transmission *qt = (struct transmission *) ++ vtpms->queued_requests.next; ++ list_del(&qt->next); ++ vtpms->current_request = qt; ++ spin_unlock_irqrestore(&vtpms->req_list_lock, flags); ++ ++ rc = vtpm_vd_send(vtpms->tpm_private, ++ qt->request, ++ qt->request_len, ++ qt); ++ ++ if (rc < 0) { ++ spin_lock_irqsave(&vtpms->req_list_lock, flags); ++ if ((qt = vtpms->current_request) != NULL) { ++ /* ++ * requeue it at the beginning ++ * of the list ++ */ ++ list_add(&qt->next, ++ &vtpms->queued_requests); ++ } ++ vtpms->current_request = NULL; ++ error = 1; ++ break; ++ } ++ /* ++ * After this point qt is not valid anymore! ++ * It is freed when the front-end is delivering ++ * the data by calling tpm_recv ++ */ ++ /* ++ * Receive response into provided dummy buffer ++ */ ++ rc = vtpm_recv(chip, buffer, sizeof(buffer)); ++ spin_lock_irqsave(&vtpms->req_list_lock, flags); ++ } ++ ++ spin_unlock_irqrestore(&vtpms->req_list_lock, flags); ++ ++ return error; ++} ++ ++static void vtpm_cancel(struct tpm_chip *chip) ++{ ++ unsigned long flags; ++ struct vtpm_state *vtpms = (struct vtpm_state *)chip_get_private(chip); ++ ++ spin_lock_irqsave(&vtpms->resp_list_lock,flags); ++ ++ if (!vtpms->current_response && vtpms->current_request) { ++ spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); ++ interruptible_sleep_on(&vtpms->resp_wait_queue); ++ spin_lock_irqsave(&vtpms->resp_list_lock,flags); ++ } ++ ++ if (vtpms->current_response) { ++ struct transmission *t = vtpms->current_response; ++ vtpms->current_response = NULL; ++ transmission_free(t); ++ } ++ ++ spin_unlock_irqrestore(&vtpms->resp_list_lock,flags); ++} ++ ++static u8 vtpm_status(struct tpm_chip *chip) ++{ ++ u8 rc = 0; ++ unsigned long flags; ++ struct vtpm_state *vtpms; ++ ++ vtpms = (struct vtpm_state *)chip_get_private(chip); ++ ++ spin_lock_irqsave(&vtpms->resp_list_lock, flags); ++ /* ++ * Data are available if: ++ * - there's a current response ++ * - the last packet was queued only (this is fake, but necessary to ++ * get the generic TPM layer to call the receive function.) ++ */ ++ if (vtpms->current_response || ++ 0 != (vtpms->flags & DATAEX_FLAG_QUEUED_ONLY)) { ++ rc = STATUS_DATA_AVAIL; ++ } else if (!vtpms->current_response && !vtpms->current_request) { ++ rc = STATUS_READY; ++ } ++ ++ spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); ++ return rc; ++} ++ ++static struct file_operations vtpm_ops = { ++ .owner = THIS_MODULE, ++ .llseek = no_llseek, ++ .open = tpm_open, ++ .read = tpm_read, ++ .write = tpm_write, ++ .release = tpm_release, ++}; ++ ++static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL); ++static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL); ++static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL); ++static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); ++static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); ++static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, ++ NULL); ++static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); ++static DEVICE_ATTR(cancel, S_IWUSR |S_IWGRP, NULL, tpm_store_cancel); ++ ++static struct attribute *vtpm_attrs[] = { ++ &dev_attr_pubek.attr, ++ &dev_attr_pcrs.attr, ++ &dev_attr_enabled.attr, ++ &dev_attr_active.attr, ++ &dev_attr_owned.attr, ++ &dev_attr_temp_deactivated.attr, ++ &dev_attr_caps.attr, ++ &dev_attr_cancel.attr, ++ NULL, ++}; ++ ++static struct attribute_group vtpm_attr_grp = { .attrs = vtpm_attrs }; ++ ++#define TPM_LONG_TIMEOUT (10 * 60 * HZ) ++ ++static struct tpm_vendor_specific tpm_vtpm = { ++ .recv = vtpm_recv, ++ .send = vtpm_send, ++ .cancel = vtpm_cancel, ++ .status = vtpm_status, ++ .req_complete_mask = STATUS_BUSY | STATUS_DATA_AVAIL, ++ .req_complete_val = STATUS_DATA_AVAIL, ++ .req_canceled = STATUS_READY, ++ .attr_group = &vtpm_attr_grp, ++ .miscdev = { ++ .fops = &vtpm_ops, ++ }, ++ .duration = { ++ TPM_LONG_TIMEOUT, ++ TPM_LONG_TIMEOUT, ++ TPM_LONG_TIMEOUT, ++ }, ++}; ++ ++struct tpm_chip *init_vtpm(struct device *dev, ++ struct tpm_private *tp) ++{ ++ long rc; ++ struct tpm_chip *chip; ++ struct vtpm_state *vtpms; ++ ++ vtpms = kzalloc(sizeof(struct vtpm_state), GFP_KERNEL); ++ if (!vtpms) ++ return ERR_PTR(-ENOMEM); ++ ++ vtpm_state_init(vtpms); ++ vtpms->tpm_private = tp; ++ ++ chip = tpm_register_hardware(dev, &tpm_vtpm); ++ if (!chip) { ++ rc = -ENODEV; ++ goto err_free_mem; ++ } ++ ++ chip_set_private(chip, vtpms); ++ ++ return chip; ++ ++err_free_mem: ++ kfree(vtpms); ++ ++ return ERR_PTR(rc); ++} ++ ++void cleanup_vtpm(struct device *dev) ++{ ++ struct tpm_chip *chip = dev_get_drvdata(dev); ++ struct vtpm_state *vtpms = (struct vtpm_state*)chip_get_private(chip); ++ tpm_remove_hardware(dev); ++ kfree(vtpms); ++} +Index: linux-2.6.27/drivers/char/tpm/tpm_vtpm.h +=================================================================== +--- /dev/null ++++ linux-2.6.27/drivers/char/tpm/tpm_vtpm.h +@@ -0,0 +1,55 @@ ++#ifndef TPM_VTPM_H ++#define TPM_VTPM_H ++ ++struct tpm_chip; ++struct tpm_private; ++ ++struct vtpm_state { ++ struct transmission *current_request; ++ spinlock_t req_list_lock; ++ wait_queue_head_t req_wait_queue; ++ ++ struct list_head queued_requests; ++ ++ struct transmission *current_response; ++ spinlock_t resp_list_lock; ++ wait_queue_head_t resp_wait_queue; // processes waiting for responses ++ ++ u8 vd_status; ++ u8 flags; ++ ++ unsigned long disconnect_time; ++ ++ /* ++ * The following is a private structure of the underlying ++ * driver. It is passed as parameter in the send function. ++ */ ++ struct tpm_private *tpm_private; ++}; ++ ++ ++enum vdev_status { ++ TPM_VD_STATUS_DISCONNECTED = 0x0, ++ TPM_VD_STATUS_CONNECTED = 0x1 ++}; ++ ++/* this function is called from tpm_vtpm.c */ ++int vtpm_vd_send(struct tpm_private * tp, ++ const u8 * buf, size_t count, void *ptr); ++ ++/* these functions are offered by tpm_vtpm.c */ ++struct tpm_chip *init_vtpm(struct device *, ++ struct tpm_private *); ++void cleanup_vtpm(struct device *); ++int vtpm_vd_recv(const struct tpm_chip* chip, ++ const unsigned char *buffer, size_t count, void *ptr); ++void vtpm_vd_status(const struct tpm_chip *, u8 status); ++ ++static inline struct tpm_private *tpm_private_from_dev(struct device *dev) ++{ ++ struct tpm_chip *chip = dev_get_drvdata(dev); ++ struct vtpm_state *vtpms = chip_get_private(chip); ++ return vtpms->tpm_private; ++} ++ ++#endif +Index: linux-2.6.27/drivers/char/tpm/tpm_xen.c +=================================================================== +--- /dev/null ++++ linux-2.6.27/drivers/char/tpm/tpm_xen.c +@@ -0,0 +1,722 @@ ++/* ++ * Copyright (c) 2005, IBM Corporation ++ * ++ * Author: Stefan Berger, stefanb@us.ibm.com ++ * Grant table support: Mahadevan Gomathisankaran ++ * ++ * This code has been derived from drivers/xen/netfront/netfront.c ++ * ++ * Copyright (c) 2002-2004, K A Fraser ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation; or, when distributed ++ * separately from the Linux kernel or incorporated into other ++ * software packages, subject to the following license: ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this source file (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, copy, modify, ++ * merge, publish, distribute, sublicense, and/or sell copies of the Software, ++ * and to permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "tpm.h" ++#include "tpm_vtpm.h" ++ ++#undef DEBUG ++ ++/* local structures */ ++struct tpm_private { ++ struct tpm_chip *chip; ++ ++ tpmif_tx_interface_t *tx; ++ atomic_t refcnt; ++ unsigned int irq; ++ u8 is_connected; ++ u8 is_suspended; ++ ++ spinlock_t tx_lock; ++ ++ struct tx_buffer *tx_buffers[TPMIF_TX_RING_SIZE]; ++ ++ atomic_t tx_busy; ++ void *tx_remember; ++ ++ domid_t backend_id; ++ wait_queue_head_t wait_q; ++ ++ struct xenbus_device *dev; ++ int ring_ref; ++}; ++ ++struct tx_buffer { ++ unsigned int size; // available space in data ++ unsigned int len; // used space in data ++ unsigned char *data; // pointer to a page ++}; ++ ++ ++/* locally visible variables */ ++static grant_ref_t gref_head; ++static struct tpm_private *my_priv; ++ ++/* local function prototypes */ ++static irqreturn_t tpmif_int(int irq, ++ void *tpm_priv, ++ struct pt_regs *ptregs); ++static void tpmif_rx_action(unsigned long unused); ++static int tpmif_connect(struct xenbus_device *dev, ++ struct tpm_private *tp, ++ domid_t domid); ++static DECLARE_TASKLET(tpmif_rx_tasklet, tpmif_rx_action, 0); ++static int tpmif_allocate_tx_buffers(struct tpm_private *tp); ++static void tpmif_free_tx_buffers(struct tpm_private *tp); ++static void tpmif_set_connected_state(struct tpm_private *tp, ++ u8 newstate); ++static int tpm_xmit(struct tpm_private *tp, ++ const u8 * buf, size_t count, int userbuffer, ++ void *remember); ++static void destroy_tpmring(struct tpm_private *tp); ++void __exit tpmif_exit(void); ++ ++#define DPRINTK(fmt, args...) \ ++ pr_debug("xen_tpm_fr (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args) ++#define IPRINTK(fmt, args...) \ ++ printk(KERN_INFO "xen_tpm_fr: " fmt, ##args) ++#define WPRINTK(fmt, args...) \ ++ printk(KERN_WARNING "xen_tpm_fr: " fmt, ##args) ++ ++#define GRANT_INVALID_REF 0 ++ ++ ++static inline int ++tx_buffer_copy(struct tx_buffer *txb, const u8 *src, int len, ++ int isuserbuffer) ++{ ++ int copied = len; ++ ++ if (len > txb->size) ++ copied = txb->size; ++ if (isuserbuffer) { ++ if (copy_from_user(txb->data, src, copied)) ++ return -EFAULT; ++ } else { ++ memcpy(txb->data, src, copied); ++ } ++ txb->len = len; ++ return copied; ++} ++ ++static inline struct tx_buffer *tx_buffer_alloc(void) ++{ ++ struct tx_buffer *txb; ++ ++ txb = kzalloc(sizeof(struct tx_buffer), GFP_KERNEL); ++ if (!txb) ++ return NULL; ++ ++ txb->len = 0; ++ txb->size = PAGE_SIZE; ++ txb->data = (unsigned char *)__get_free_page(GFP_KERNEL); ++ if (txb->data == NULL) { ++ kfree(txb); ++ txb = NULL; ++ } ++ ++ return txb; ++} ++ ++ ++static inline void tx_buffer_free(struct tx_buffer *txb) ++{ ++ if (txb) { ++ free_page((long)txb->data); ++ kfree(txb); ++ } ++} ++ ++/************************************************************** ++ Utility function for the tpm_private structure ++**************************************************************/ ++static void tpm_private_init(struct tpm_private *tp) ++{ ++ spin_lock_init(&tp->tx_lock); ++ init_waitqueue_head(&tp->wait_q); ++ atomic_set(&tp->refcnt, 1); ++} ++ ++static void tpm_private_put(void) ++{ ++ if (!atomic_dec_and_test(&my_priv->refcnt)) ++ return; ++ ++ tpmif_free_tx_buffers(my_priv); ++ kfree(my_priv); ++ my_priv = NULL; ++} ++ ++static struct tpm_private *tpm_private_get(void) ++{ ++ int err; ++ ++ if (my_priv) { ++ atomic_inc(&my_priv->refcnt); ++ return my_priv; ++ } ++ ++ my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL); ++ if (!my_priv) ++ return NULL; ++ ++ tpm_private_init(my_priv); ++ err = tpmif_allocate_tx_buffers(my_priv); ++ if (err < 0) ++ tpm_private_put(); ++ ++ return my_priv; ++} ++ ++/************************************************************** ++ ++ The interface to let the tpm plugin register its callback ++ function and send data to another partition using this module ++ ++**************************************************************/ ++ ++static DEFINE_MUTEX(suspend_lock); ++/* ++ * Send data via this module by calling this function ++ */ ++int vtpm_vd_send(struct tpm_private *tp, ++ const u8 * buf, size_t count, void *ptr) ++{ ++ int sent; ++ ++ mutex_lock(&suspend_lock); ++ sent = tpm_xmit(tp, buf, count, 0, ptr); ++ mutex_unlock(&suspend_lock); ++ ++ return sent; ++} ++ ++/************************************************************** ++ XENBUS support code ++**************************************************************/ ++ ++static int setup_tpmring(struct xenbus_device *dev, ++ struct tpm_private *tp) ++{ ++ tpmif_tx_interface_t *sring; ++ int err; ++ ++ tp->ring_ref = GRANT_INVALID_REF; ++ ++ sring = (void *)__get_free_page(GFP_KERNEL); ++ if (!sring) { ++ xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); ++ return -ENOMEM; ++ } ++ tp->tx = sring; ++ ++ err = xenbus_grant_ring(dev, virt_to_mfn(tp->tx)); ++ if (err < 0) { ++ free_page((unsigned long)sring); ++ tp->tx = NULL; ++ xenbus_dev_fatal(dev, err, "allocating grant reference"); ++ goto fail; ++ } ++ tp->ring_ref = err; ++ ++ err = tpmif_connect(dev, tp, dev->otherend_id); ++ if (err) ++ goto fail; ++ ++ return 0; ++fail: ++ destroy_tpmring(tp); ++ return err; ++} ++ ++ ++static void destroy_tpmring(struct tpm_private *tp) ++{ ++ tpmif_set_connected_state(tp, 0); ++ ++ if (tp->ring_ref != GRANT_INVALID_REF) { ++ gnttab_end_foreign_access(tp->ring_ref, (unsigned long)tp->tx); ++ tp->ring_ref = GRANT_INVALID_REF; ++ tp->tx = NULL; ++ } ++ ++ if (tp->irq) ++ unbind_from_irqhandler(tp->irq, tp); ++ ++ tp->irq = 0; ++} ++ ++ ++static int talk_to_backend(struct xenbus_device *dev, ++ struct tpm_private *tp) ++{ ++ const char *message = NULL; ++ int err; ++ struct xenbus_transaction xbt; ++ ++ err = setup_tpmring(dev, tp); ++ if (err) { ++ xenbus_dev_fatal(dev, err, "setting up ring"); ++ goto out; ++ } ++ ++again: ++ err = xenbus_transaction_start(&xbt); ++ if (err) { ++ xenbus_dev_fatal(dev, err, "starting transaction"); ++ goto destroy_tpmring; ++ } ++ ++ err = xenbus_printf(xbt, dev->nodename, ++ "ring-ref","%u", tp->ring_ref); ++ if (err) { ++ message = "writing ring-ref"; ++ goto abort_transaction; ++ } ++ ++ err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", ++ irq_to_evtchn_port(tp->irq)); ++ if (err) { ++ message = "writing event-channel"; ++ goto abort_transaction; ++ } ++ ++ err = xenbus_transaction_end(xbt, 0); ++ if (err == -EAGAIN) ++ goto again; ++ if (err) { ++ xenbus_dev_fatal(dev, err, "completing transaction"); ++ goto destroy_tpmring; ++ } ++ ++ xenbus_switch_state(dev, XenbusStateConnected); ++ ++ return 0; ++ ++abort_transaction: ++ xenbus_transaction_end(xbt, 1); ++ if (message) ++ xenbus_dev_error(dev, err, "%s", message); ++destroy_tpmring: ++ destroy_tpmring(tp); ++out: ++ return err; ++} ++ ++/** ++ * Callback received when the backend's state changes. ++ */ ++static void backend_changed(struct xenbus_device *dev, ++ enum xenbus_state backend_state) ++{ ++ struct tpm_private *tp = tpm_private_from_dev(&dev->dev); ++ DPRINTK("\n"); ++ ++ switch (backend_state) { ++ case XenbusStateInitialising: ++ case XenbusStateInitWait: ++ case XenbusStateInitialised: ++ case XenbusStateReconfiguring: ++ case XenbusStateReconfigured: ++ case XenbusStateUnknown: ++ break; ++ ++ case XenbusStateConnected: ++ tpmif_set_connected_state(tp, 1); ++ break; ++ ++ case XenbusStateClosing: ++ tpmif_set_connected_state(tp, 0); ++ xenbus_frontend_closed(dev); ++ break; ++ ++ case XenbusStateClosed: ++ tpmif_set_connected_state(tp, 0); ++ if (tp->is_suspended == 0) ++ device_unregister(&dev->dev); ++ xenbus_frontend_closed(dev); ++ break; ++ } ++} ++ ++static int tpmfront_probe(struct xenbus_device *dev, ++ const struct xenbus_device_id *id) ++{ ++ int err; ++ int handle; ++ struct tpm_private *tp = tpm_private_get(); ++ ++ if (!tp) ++ return -ENOMEM; ++ ++ tp->chip = init_vtpm(&dev->dev, tp); ++ if (IS_ERR(tp->chip)) ++ return PTR_ERR(tp->chip); ++ ++ err = xenbus_scanf(XBT_NIL, dev->nodename, ++ "handle", "%i", &handle); ++ if (XENBUS_EXIST_ERR(err)) ++ return err; ++ ++ if (err < 0) { ++ xenbus_dev_fatal(dev,err,"reading virtual-device"); ++ return err; ++ } ++ ++ tp->dev = dev; ++ ++ err = talk_to_backend(dev, tp); ++ if (err) { ++ tpm_private_put(); ++ return err; ++ } ++ ++ return 0; ++} ++ ++ ++static int tpmfront_remove(struct xenbus_device *dev) ++{ ++ struct tpm_private *tp = tpm_private_from_dev(&dev->dev); ++ destroy_tpmring(tp); ++ cleanup_vtpm(&dev->dev); ++ return 0; ++} ++ ++static int tpmfront_suspend(struct xenbus_device *dev) ++{ ++ struct tpm_private *tp = tpm_private_from_dev(&dev->dev); ++ u32 ctr; ++ ++ /* Take the lock, preventing any application from sending. */ ++ mutex_lock(&suspend_lock); ++ tp->is_suspended = 1; ++ ++ for (ctr = 0; atomic_read(&tp->tx_busy); ctr++) { ++ if ((ctr % 10) == 0) ++ printk("TPM-FE [INFO]: Waiting for outstanding " ++ "request.\n"); ++ /* Wait for a request to be responded to. */ ++ interruptible_sleep_on_timeout(&tp->wait_q, 100); ++ } ++ ++ return 0; ++} ++ ++static int tpmfront_suspend_finish(struct tpm_private *tp) ++{ ++ tp->is_suspended = 0; ++ /* Allow applications to send again. */ ++ mutex_unlock(&suspend_lock); ++ return 0; ++} ++ ++static int tpmfront_suspend_cancel(struct xenbus_device *dev) ++{ ++ struct tpm_private *tp = tpm_private_from_dev(&dev->dev); ++ return tpmfront_suspend_finish(tp); ++} ++ ++static int tpmfront_resume(struct xenbus_device *dev) ++{ ++ struct tpm_private *tp = tpm_private_from_dev(&dev->dev); ++ destroy_tpmring(tp); ++ return talk_to_backend(dev, tp); ++} ++ ++static int tpmif_connect(struct xenbus_device *dev, ++ struct tpm_private *tp, ++ domid_t domid) ++{ ++ int err; ++ ++ tp->backend_id = domid; ++ ++ err = bind_listening_port_to_irqhandler( ++ domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp); ++ if (err <= 0) { ++ WPRINTK("bind_listening_port_to_irqhandler failed " ++ "(err=%d)\n", err); ++ return err; ++ } ++ tp->irq = err; ++ ++ return 0; ++} ++ ++static struct xenbus_device_id tpmfront_ids[] = { ++ { "vtpm" }, ++ { "" } ++}; ++ ++static struct xenbus_driver tpmfront = { ++ .name = "vtpm", ++ .owner = THIS_MODULE, ++ .ids = tpmfront_ids, ++ .probe = tpmfront_probe, ++ .remove = tpmfront_remove, ++ .resume = tpmfront_resume, ++ .otherend_changed = backend_changed, ++ .suspend = tpmfront_suspend, ++ .suspend_cancel = tpmfront_suspend_cancel, ++}; ++ ++static void __init init_tpm_xenbus(void) ++{ ++ xenbus_register_frontend(&tpmfront); ++} ++ ++static int tpmif_allocate_tx_buffers(struct tpm_private *tp) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < TPMIF_TX_RING_SIZE; i++) { ++ tp->tx_buffers[i] = tx_buffer_alloc(); ++ if (!tp->tx_buffers[i]) { ++ tpmif_free_tx_buffers(tp); ++ return -ENOMEM; ++ } ++ } ++ return 0; ++} ++ ++static void tpmif_free_tx_buffers(struct tpm_private *tp) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < TPMIF_TX_RING_SIZE; i++) ++ tx_buffer_free(tp->tx_buffers[i]); ++} ++ ++static void tpmif_rx_action(unsigned long priv) ++{ ++ struct tpm_private *tp = (struct tpm_private *)priv; ++ int i = 0; ++ unsigned int received; ++ unsigned int offset = 0; ++ u8 *buffer; ++ tpmif_tx_request_t *tx = &tp->tx->ring[i].req; ++ ++ atomic_set(&tp->tx_busy, 0); ++ wake_up_interruptible(&tp->wait_q); ++ ++ received = tx->size; ++ ++ buffer = kmalloc(received, GFP_ATOMIC); ++ if (!buffer) ++ return; ++ ++ for (i = 0; i < TPMIF_TX_RING_SIZE && offset < received; i++) { ++ struct tx_buffer *txb = tp->tx_buffers[i]; ++ tpmif_tx_request_t *tx; ++ unsigned int tocopy; ++ ++ tx = &tp->tx->ring[i].req; ++ tocopy = tx->size; ++ if (tocopy > PAGE_SIZE) ++ tocopy = PAGE_SIZE; ++ ++ memcpy(&buffer[offset], txb->data, tocopy); ++ ++ gnttab_release_grant_reference(&gref_head, tx->ref); ++ ++ offset += tocopy; ++ } ++ ++ vtpm_vd_recv(tp->chip, buffer, received, tp->tx_remember); ++ kfree(buffer); ++} ++ ++ ++static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs) ++{ ++ struct tpm_private *tp = tpm_priv; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&tp->tx_lock, flags); ++ tpmif_rx_tasklet.data = (unsigned long)tp; ++ tasklet_schedule(&tpmif_rx_tasklet); ++ spin_unlock_irqrestore(&tp->tx_lock, flags); ++ ++ return IRQ_HANDLED; ++} ++ ++ ++static int tpm_xmit(struct tpm_private *tp, ++ const u8 * buf, size_t count, int isuserbuffer, ++ void *remember) ++{ ++ tpmif_tx_request_t *tx; ++ TPMIF_RING_IDX i; ++ unsigned int offset = 0; ++ ++ spin_lock_irq(&tp->tx_lock); ++ ++ if (unlikely(atomic_read(&tp->tx_busy))) { ++ printk("tpm_xmit: There's an outstanding request/response " ++ "on the way!\n"); ++ spin_unlock_irq(&tp->tx_lock); ++ return -EBUSY; ++ } ++ ++ if (tp->is_connected != 1) { ++ spin_unlock_irq(&tp->tx_lock); ++ return -EIO; ++ } ++ ++ for (i = 0; count > 0 && i < TPMIF_TX_RING_SIZE; i++) { ++ struct tx_buffer *txb = tp->tx_buffers[i]; ++ int copied; ++ ++ if (!txb) { ++ DPRINTK("txb (i=%d) is NULL. buffers initilized?\n" ++ "Not transmitting anything!\n", i); ++ spin_unlock_irq(&tp->tx_lock); ++ return -EFAULT; ++ } ++ ++ copied = tx_buffer_copy(txb, &buf[offset], count, ++ isuserbuffer); ++ if (copied < 0) { ++ /* An error occurred */ ++ spin_unlock_irq(&tp->tx_lock); ++ return copied; ++ } ++ count -= copied; ++ offset += copied; ++ ++ tx = &tp->tx->ring[i].req; ++ tx->addr = virt_to_machine(txb->data); ++ tx->size = txb->len; ++ tx->unused = 0; ++ ++ DPRINTK("First 4 characters sent by TPM-FE are " ++ "0x%02x 0x%02x 0x%02x 0x%02x\n", ++ txb->data[0],txb->data[1],txb->data[2],txb->data[3]); ++ ++ /* Get the granttable reference for this page. */ ++ tx->ref = gnttab_claim_grant_reference(&gref_head); ++ if (tx->ref == -ENOSPC) { ++ spin_unlock_irq(&tp->tx_lock); ++ DPRINTK("Grant table claim reference failed in " ++ "func:%s line:%d file:%s\n", ++ __FUNCTION__, __LINE__, __FILE__); ++ return -ENOSPC; ++ } ++ gnttab_grant_foreign_access_ref(tx->ref, ++ tp->backend_id, ++ virt_to_mfn(txb->data), ++ 0 /*RW*/); ++ wmb(); ++ } ++ ++ atomic_set(&tp->tx_busy, 1); ++ tp->tx_remember = remember; ++ ++ mb(); ++ ++ notify_remote_via_irq(tp->irq); ++ ++ spin_unlock_irq(&tp->tx_lock); ++ return offset; ++} ++ ++ ++static void tpmif_notify_upperlayer(struct tpm_private *tp) ++{ ++ /* Notify upper layer about the state of the connection to the BE. */ ++ vtpm_vd_status(tp->chip, (tp->is_connected ++ ? TPM_VD_STATUS_CONNECTED ++ : TPM_VD_STATUS_DISCONNECTED)); ++} ++ ++ ++static void tpmif_set_connected_state(struct tpm_private *tp, u8 is_connected) ++{ ++ /* ++ * Don't notify upper layer if we are in suspend mode and ++ * should disconnect - assumption is that we will resume ++ * The mutex keeps apps from sending. ++ */ ++ if (is_connected == 0 && tp->is_suspended == 1) ++ return; ++ ++ /* ++ * Unlock the mutex if we are connected again ++ * after being suspended - now resuming. ++ * This also removes the suspend state. ++ */ ++ if (is_connected == 1 && tp->is_suspended == 1) ++ tpmfront_suspend_finish(tp); ++ ++ if (is_connected != tp->is_connected) { ++ tp->is_connected = is_connected; ++ tpmif_notify_upperlayer(tp); ++ } ++} ++ ++ ++ ++/* ================================================================= ++ * Initialization function. ++ * ================================================================= ++ */ ++ ++ ++static int __init tpmif_init(void) ++{ ++ struct tpm_private *tp; ++ ++ if (is_initial_xendomain()) ++ return -EPERM; ++ ++ tp = tpm_private_get(); ++ if (!tp) ++ return -ENOMEM; ++ ++ IPRINTK("Initialising the vTPM driver.\n"); ++ if (gnttab_alloc_grant_references(TPMIF_TX_RING_SIZE, ++ &gref_head) < 0) { ++ tpm_private_put(); ++ return -EFAULT; ++ } ++ ++ init_tpm_xenbus(); ++ return 0; ++} ++ ++ ++module_init(tpmif_init); ++ ++MODULE_LICENSE("Dual BSD/GPL"); +Index: linux-2.6.27/drivers/ide/ide-lib.c +=================================================================== +--- linux-2.6.27.orig/drivers/ide/ide-lib.c ++++ linux-2.6.27/drivers/ide/ide-lib.c +@@ -177,12 +177,12 @@ void ide_toggle_bounce(ide_drive_t *driv + { + u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */ + +- if (!PCI_DMA_BUS_IS_PHYS) { +- addr = BLK_BOUNCE_ANY; +- } else if (on && drive->media == ide_disk) { ++ if (on && drive->media == ide_disk) { + struct device *dev = drive->hwif->dev; + +- if (dev && dev->dma_mask) ++ if (!PCI_DMA_BUS_IS_PHYS) ++ addr = BLK_BOUNCE_ANY; ++ else if (dev && dev->dma_mask) + addr = *dev->dma_mask; + } + +Index: linux-2.6.27/drivers/oprofile/buffer_sync.c +=================================================================== +--- linux-2.6.27.orig/drivers/oprofile/buffer_sync.c ++++ linux-2.6.27/drivers/oprofile/buffer_sync.c +@@ -6,6 +6,10 @@ + * + * @author John Levon + * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. ++ * + * This is the core of the buffer management. Each + * CPU buffer is processed and entered into the + * global event buffer. Such processing is necessary +@@ -40,6 +44,7 @@ static cpumask_t marked_cpus = CPU_MASK_ + static DEFINE_SPINLOCK(task_mortuary); + static void process_task_mortuary(void); + ++static int cpu_current_domain[NR_CPUS]; + + /* Take ownership of the task struct and place it on the + * list for processing. Only after two full buffer syncs +@@ -148,6 +153,11 @@ static void end_sync(void) + int sync_start(void) + { + int err; ++ int i; ++ ++ for (i = 0; i < NR_CPUS; i++) { ++ cpu_current_domain[i] = COORDINATOR_DOMAIN; ++ } + + start_cpu_work(); + +@@ -274,15 +284,31 @@ static void add_cpu_switch(int i) + last_cookie = INVALID_COOKIE; + } + +-static void add_kernel_ctx_switch(unsigned int in_kernel) ++static void add_cpu_mode_switch(unsigned int cpu_mode) + { + add_event_entry(ESCAPE_CODE); +- if (in_kernel) +- add_event_entry(KERNEL_ENTER_SWITCH_CODE); +- else +- add_event_entry(KERNEL_EXIT_SWITCH_CODE); ++ switch (cpu_mode) { ++ case CPU_MODE_USER: ++ add_event_entry(USER_ENTER_SWITCH_CODE); ++ break; ++ case CPU_MODE_KERNEL: ++ add_event_entry(KERNEL_ENTER_SWITCH_CODE); ++ break; ++ case CPU_MODE_XEN: ++ add_event_entry(XEN_ENTER_SWITCH_CODE); ++ break; ++ default: ++ break; ++ } + } +- ++ ++static void add_domain_switch(unsigned long domain_id) ++{ ++ add_event_entry(ESCAPE_CODE); ++ add_event_entry(DOMAIN_SWITCH_CODE); ++ add_event_entry(domain_id); ++} ++ + static void + add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) + { +@@ -347,9 +373,9 @@ static int add_us_sample(struct mm_struc + * for later lookup from userspace. + */ + static int +-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel) ++add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode) + { +- if (in_kernel) { ++ if (cpu_mode >= CPU_MODE_KERNEL) { + add_sample_entry(s->eip, s->event); + return 1; + } else if (mm) { +@@ -495,15 +521,21 @@ void sync_buffer(int cpu) + struct mm_struct *mm = NULL; + struct task_struct * new; + unsigned long cookie = 0; +- int in_kernel = 1; ++ int cpu_mode = 1; + unsigned int i; + sync_buffer_state state = sb_buffer_start; + unsigned long available; ++ int domain_switch = 0; + + mutex_lock(&buffer_mutex); + + add_cpu_switch(cpu); + ++ /* We need to assign the first samples in this CPU buffer to the ++ same domain that we were processing at the last sync_buffer */ ++ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) { ++ add_domain_switch(cpu_current_domain[cpu]); ++ } + /* Remember, only we can modify tail_pos */ + + available = get_slots(cpu_buf); +@@ -511,16 +543,18 @@ void sync_buffer(int cpu) + for (i = 0; i < available; ++i) { + struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; + +- if (is_code(s->eip)) { +- if (s->event <= CPU_IS_KERNEL) { +- /* kernel/userspace switch */ +- in_kernel = s->event; ++ if (is_code(s->eip) && !domain_switch) { ++ if (s->event <= CPU_MODE_XEN) { ++ /* xen/kernel/userspace switch */ ++ cpu_mode = s->event; + if (state == sb_buffer_start) + state = sb_sample_start; +- add_kernel_ctx_switch(s->event); ++ add_cpu_mode_switch(s->event); + } else if (s->event == CPU_TRACE_BEGIN) { + state = sb_bt_start; + add_trace_begin(); ++ } else if (s->event == CPU_DOMAIN_SWITCH) { ++ domain_switch = 1; + } else { + struct mm_struct * oldmm = mm; + +@@ -534,11 +568,21 @@ void sync_buffer(int cpu) + add_user_ctx_switch(new, cookie); + } + } else { +- if (state >= sb_bt_start && +- !add_sample(mm, s, in_kernel)) { +- if (state == sb_bt_start) { +- state = sb_bt_ignore; +- atomic_inc(&oprofile_stats.bt_lost_no_mapping); ++ if (domain_switch) { ++ cpu_current_domain[cpu] = s->eip; ++ add_domain_switch(s->eip); ++ domain_switch = 0; ++ } else { ++ if (cpu_current_domain[cpu] != ++ COORDINATOR_DOMAIN) { ++ add_sample_entry(s->eip, s->event); ++ } ++ else if (state >= sb_bt_start && ++ !add_sample(mm, s, cpu_mode)) { ++ if (state == sb_bt_start) { ++ state = sb_bt_ignore; ++ atomic_inc(&oprofile_stats.bt_lost_no_mapping); ++ } + } + } + } +@@ -547,6 +591,11 @@ void sync_buffer(int cpu) + } + release_mm(mm); + ++ /* We reset domain to COORDINATOR at each CPU switch */ ++ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) { ++ add_domain_switch(COORDINATOR_DOMAIN); ++ } ++ + mark_done(cpu); + + mutex_unlock(&buffer_mutex); +Index: linux-2.6.27/drivers/oprofile/cpu_buffer.c +=================================================================== +--- linux-2.6.27.orig/drivers/oprofile/cpu_buffer.c ++++ linux-2.6.27/drivers/oprofile/cpu_buffer.c +@@ -6,6 +6,10 @@ + * + * @author John Levon + * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. ++ * + * Each CPU has a local buffer that stores PC value/event + * pairs. We also log context switches when we notice them. + * Eventually each CPU's buffer is processed into the global +@@ -34,6 +38,8 @@ static void wq_sync_buffer(struct work_s + #define DEFAULT_TIMER_EXPIRE (HZ / 10) + static int work_enabled; + ++static int32_t current_domain = COORDINATOR_DOMAIN; ++ + void free_cpu_buffers(void) + { + int i; +@@ -72,7 +78,7 @@ int alloc_cpu_buffers(void) + goto fail; + + b->last_task = NULL; +- b->last_is_kernel = -1; ++ b->last_cpu_mode = -1; + b->tracing = 0; + b->buffer_size = buffer_size; + b->tail_pos = 0; +@@ -130,7 +136,7 @@ void cpu_buffer_reset(struct oprofile_cp + * collected will populate the buffer with proper + * values to initialize the buffer + */ +- cpu_buf->last_is_kernel = -1; ++ cpu_buf->last_cpu_mode = -1; + cpu_buf->last_task = NULL; + } + +@@ -180,13 +186,13 @@ add_code(struct oprofile_cpu_buffer * bu + * because of the head/tail separation of the writer and reader + * of the CPU buffer. + * +- * is_kernel is needed because on some architectures you cannot ++ * cpu_mode is needed because on some architectures you cannot + * tell if you are in kernel or user space simply by looking at +- * pc. We tag this in the buffer by generating kernel enter/exit +- * events whenever is_kernel changes ++ * pc. We tag this in the buffer by generating kernel/user (and xen) ++ * enter events whenever cpu_mode changes + */ + static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, +- int is_kernel, unsigned long event) ++ int cpu_mode, unsigned long event) + { + struct task_struct * task; + +@@ -202,18 +208,18 @@ static int log_sample(struct oprofile_cp + return 0; + } + +- is_kernel = !!is_kernel; +- + task = current; + + /* notice a switch from user->kernel or vice versa */ +- if (cpu_buf->last_is_kernel != is_kernel) { +- cpu_buf->last_is_kernel = is_kernel; +- add_code(cpu_buf, is_kernel); ++ if (cpu_buf->last_cpu_mode != cpu_mode) { ++ cpu_buf->last_cpu_mode = cpu_mode; ++ add_code(cpu_buf, cpu_mode); + } +- ++ + /* notice a task switch */ +- if (cpu_buf->last_task != task) { ++ /* if not processing other domain samples */ ++ if ((cpu_buf->last_task != task) && ++ (current_domain == COORDINATOR_DOMAIN)) { + cpu_buf->last_task = task; + add_code(cpu_buf, (unsigned long)task); + } +@@ -297,6 +303,25 @@ void oprofile_add_trace(unsigned long pc + add_sample(cpu_buf, pc, 0); + } + ++int oprofile_add_domain_switch(int32_t domain_id) ++{ ++ struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()]; ++ ++ /* should have space for switching into and out of domain ++ (2 slots each) plus one sample and one cpu mode switch */ ++ if (((nr_available_slots(cpu_buf) < 6) && ++ (domain_id != COORDINATOR_DOMAIN)) || ++ (nr_available_slots(cpu_buf) < 2)) ++ return 0; ++ ++ add_code(cpu_buf, CPU_DOMAIN_SWITCH); ++ add_sample(cpu_buf, domain_id, 0); ++ ++ current_domain = domain_id; ++ ++ return 1; ++} ++ + /* + * This serves to avoid cpu buffer overflow, and makes sure + * the task mortuary progresses +Index: linux-2.6.27/drivers/oprofile/cpu_buffer.h +=================================================================== +--- linux-2.6.27.orig/drivers/oprofile/cpu_buffer.h ++++ linux-2.6.27/drivers/oprofile/cpu_buffer.h +@@ -37,7 +37,7 @@ struct oprofile_cpu_buffer { + volatile unsigned long tail_pos; + unsigned long buffer_size; + struct task_struct * last_task; +- int last_is_kernel; ++ int last_cpu_mode; + int tracing; + struct op_sample * buffer; + unsigned long sample_received; +@@ -53,7 +53,10 @@ DECLARE_PER_CPU(struct oprofile_cpu_buff + void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf); + + /* transient events for the CPU buffer -> event buffer */ +-#define CPU_IS_KERNEL 1 +-#define CPU_TRACE_BEGIN 2 ++#define CPU_MODE_USER 0 ++#define CPU_MODE_KERNEL 1 ++#define CPU_MODE_XEN 2 ++#define CPU_TRACE_BEGIN 3 ++#define CPU_DOMAIN_SWITCH 4 + + #endif /* OPROFILE_CPU_BUFFER_H */ +Index: linux-2.6.27/drivers/oprofile/event_buffer.h +=================================================================== +--- linux-2.6.27.orig/drivers/oprofile/event_buffer.h ++++ linux-2.6.27/drivers/oprofile/event_buffer.h +@@ -30,6 +30,9 @@ void wake_up_buffer_waiter(void); + #define INVALID_COOKIE ~0UL + #define NO_COOKIE 0UL + ++/* Constant used to refer to coordinator domain (Xen) */ ++#define COORDINATOR_DOMAIN -1 ++ + extern const struct file_operations event_buffer_fops; + + /* mutex between sync_cpu_buffers() and the +Index: linux-2.6.27/drivers/oprofile/oprof.c +=================================================================== +--- linux-2.6.27.orig/drivers/oprofile/oprof.c ++++ linux-2.6.27/drivers/oprofile/oprof.c +@@ -5,6 +5,10 @@ + * @remark Read the file COPYING + * + * @author John Levon ++ * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. + */ + + #include +@@ -33,6 +37,32 @@ static DEFINE_MUTEX(start_mutex); + */ + static int timer = 0; + ++int oprofile_set_active(int active_domains[], unsigned int adomains) ++{ ++ int err; ++ ++ if (!oprofile_ops.set_active) ++ return -EINVAL; ++ ++ mutex_lock(&start_mutex); ++ err = oprofile_ops.set_active(active_domains, adomains); ++ mutex_unlock(&start_mutex); ++ return err; ++} ++ ++int oprofile_set_passive(int passive_domains[], unsigned int pdomains) ++{ ++ int err; ++ ++ if (!oprofile_ops.set_passive) ++ return -EINVAL; ++ ++ mutex_lock(&start_mutex); ++ err = oprofile_ops.set_passive(passive_domains, pdomains); ++ mutex_unlock(&start_mutex); ++ return err; ++} ++ + int oprofile_setup(void) + { + int err; +Index: linux-2.6.27/drivers/oprofile/oprof.h +=================================================================== +--- linux-2.6.27.orig/drivers/oprofile/oprof.h ++++ linux-2.6.27/drivers/oprofile/oprof.h +@@ -35,5 +35,8 @@ void oprofile_create_files(struct super_ + void oprofile_timer_init(struct oprofile_operations * ops); + + int oprofile_set_backtrace(unsigned long depth); ++ ++int oprofile_set_active(int active_domains[], unsigned int adomains); ++int oprofile_set_passive(int passive_domains[], unsigned int pdomains); + + #endif /* OPROF_H */ +Index: linux-2.6.27/drivers/oprofile/oprofile_files.c +=================================================================== +--- linux-2.6.27.orig/drivers/oprofile/oprofile_files.c ++++ linux-2.6.27/drivers/oprofile/oprofile_files.c +@@ -5,15 +5,21 @@ + * @remark Read the file COPYING + * + * @author John Levon ++ * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. + */ + + #include + #include ++#include ++#include + + #include "event_buffer.h" + #include "oprofile_stats.h" + #include "oprof.h" +- ++ + unsigned long fs_buffer_size = 131072; + unsigned long fs_cpu_buffer_size = 8192; + unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ +@@ -117,11 +123,202 @@ static ssize_t dump_write(struct file * + static const struct file_operations dump_fops = { + .write = dump_write, + }; +- ++ ++#define TMPBUFSIZE 512 ++ ++static unsigned int adomains = 0; ++static int active_domains[MAX_OPROF_DOMAINS + 1]; ++static DEFINE_MUTEX(adom_mutex); ++ ++static ssize_t adomain_write(struct file * file, char const __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char *tmpbuf; ++ char *startp, *endp; ++ int i; ++ unsigned long val; ++ ssize_t retval = count; ++ ++ if (*offset) ++ return -EINVAL; ++ if (count > TMPBUFSIZE - 1) ++ return -EINVAL; ++ ++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) ++ return -ENOMEM; ++ ++ if (copy_from_user(tmpbuf, buf, count)) { ++ kfree(tmpbuf); ++ return -EFAULT; ++ } ++ tmpbuf[count] = 0; ++ ++ mutex_lock(&adom_mutex); ++ ++ startp = tmpbuf; ++ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */ ++ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) { ++ val = simple_strtoul(startp, &endp, 0); ++ if (endp == startp) ++ break; ++ while (ispunct(*endp) || isspace(*endp)) ++ endp++; ++ active_domains[i] = val; ++ if (active_domains[i] != val) ++ /* Overflow, force error below */ ++ i = MAX_OPROF_DOMAINS + 1; ++ startp = endp; ++ } ++ /* Force error on trailing junk */ ++ adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i; ++ ++ kfree(tmpbuf); ++ ++ if (adomains > MAX_OPROF_DOMAINS ++ || oprofile_set_active(active_domains, adomains)) { ++ adomains = 0; ++ retval = -EINVAL; ++ } ++ ++ mutex_unlock(&adom_mutex); ++ return retval; ++} ++ ++static ssize_t adomain_read(struct file * file, char __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char * tmpbuf; ++ size_t len; ++ int i; ++ ssize_t retval; ++ ++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) ++ return -ENOMEM; ++ ++ mutex_lock(&adom_mutex); ++ ++ len = 0; ++ for (i = 0; i < adomains; i++) ++ len += snprintf(tmpbuf + len, ++ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0, ++ "%u ", active_domains[i]); ++ WARN_ON(len > TMPBUFSIZE); ++ if (len != 0 && len <= TMPBUFSIZE) ++ tmpbuf[len-1] = '\n'; ++ ++ mutex_unlock(&adom_mutex); ++ ++ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len); ++ ++ kfree(tmpbuf); ++ return retval; ++} ++ ++ ++static struct file_operations active_domain_ops = { ++ .read = adomain_read, ++ .write = adomain_write, ++}; ++ ++static unsigned int pdomains = 0; ++static int passive_domains[MAX_OPROF_DOMAINS]; ++static DEFINE_MUTEX(pdom_mutex); ++ ++static ssize_t pdomain_write(struct file * file, char const __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char *tmpbuf; ++ char *startp, *endp; ++ int i; ++ unsigned long val; ++ ssize_t retval = count; ++ ++ if (*offset) ++ return -EINVAL; ++ if (count > TMPBUFSIZE - 1) ++ return -EINVAL; ++ ++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) ++ return -ENOMEM; ++ ++ if (copy_from_user(tmpbuf, buf, count)) { ++ kfree(tmpbuf); ++ return -EFAULT; ++ } ++ tmpbuf[count] = 0; ++ ++ mutex_lock(&pdom_mutex); ++ ++ startp = tmpbuf; ++ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */ ++ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) { ++ val = simple_strtoul(startp, &endp, 0); ++ if (endp == startp) ++ break; ++ while (ispunct(*endp) || isspace(*endp)) ++ endp++; ++ passive_domains[i] = val; ++ if (passive_domains[i] != val) ++ /* Overflow, force error below */ ++ i = MAX_OPROF_DOMAINS + 1; ++ startp = endp; ++ } ++ /* Force error on trailing junk */ ++ pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i; ++ ++ kfree(tmpbuf); ++ ++ if (pdomains > MAX_OPROF_DOMAINS ++ || oprofile_set_passive(passive_domains, pdomains)) { ++ pdomains = 0; ++ retval = -EINVAL; ++ } ++ ++ mutex_unlock(&pdom_mutex); ++ return retval; ++} ++ ++static ssize_t pdomain_read(struct file * file, char __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char * tmpbuf; ++ size_t len; ++ int i; ++ ssize_t retval; ++ ++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) ++ return -ENOMEM; ++ ++ mutex_lock(&pdom_mutex); ++ ++ len = 0; ++ for (i = 0; i < pdomains; i++) ++ len += snprintf(tmpbuf + len, ++ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0, ++ "%u ", passive_domains[i]); ++ WARN_ON(len > TMPBUFSIZE); ++ if (len != 0 && len <= TMPBUFSIZE) ++ tmpbuf[len-1] = '\n'; ++ ++ mutex_unlock(&pdom_mutex); ++ ++ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len); ++ ++ kfree(tmpbuf); ++ return retval; ++} ++ ++static struct file_operations passive_domain_ops = { ++ .read = pdomain_read, ++ .write = pdomain_write, ++}; ++ + void oprofile_create_files(struct super_block * sb, struct dentry * root) + { + oprofilefs_create_file(sb, root, "enable", &enable_fops); + oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); ++ oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops); ++ oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops); + oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); + oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size); + oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed); +Index: linux-2.6.27/fs/aio.c +=================================================================== +--- linux-2.6.27.orig/fs/aio.c ++++ linux-2.6.27/fs/aio.c +@@ -36,6 +36,11 @@ + #include + #include + ++#ifdef CONFIG_EPOLL ++#include ++#include ++#endif ++ + #if DEBUG > 1 + #define dprintk printk + #else +@@ -1026,6 +1031,11 @@ put_rq: + if (waitqueue_active(&ctx->wait)) + wake_up(&ctx->wait); + ++#ifdef CONFIG_EPOLL ++ if (ctx->file && waitqueue_active(&ctx->poll_wait)) ++ wake_up(&ctx->poll_wait); ++#endif ++ + spin_unlock_irqrestore(&ctx->ctx_lock, flags); + return ret; + } +@@ -1033,6 +1043,8 @@ put_rq: + /* aio_read_evt + * Pull an event off of the ioctx's event ring. Returns the number of + * events fetched (0 or 1 ;-) ++ * If ent parameter is 0, just returns the number of events that would ++ * be fetched. + * FIXME: make this use cmpxchg. + * TODO: make the ringbuffer user mmap()able (requires FIXME). + */ +@@ -1055,13 +1067,18 @@ static int aio_read_evt(struct kioctx *i + + head = ring->head % info->nr; + if (head != ring->tail) { +- struct io_event *evp = aio_ring_event(info, head, KM_USER1); +- *ent = *evp; +- head = (head + 1) % info->nr; +- smp_mb(); /* finish reading the event before updatng the head */ +- ring->head = head; +- ret = 1; +- put_aio_ring_event(evp, KM_USER1); ++ if (ent) { /* event requested */ ++ struct io_event *evp = ++ aio_ring_event(info, head, KM_USER1); ++ *ent = *evp; ++ head = (head + 1) % info->nr; ++ /* finish reading the event before updatng the head */ ++ smp_mb(); ++ ring->head = head; ++ ret = 1; ++ put_aio_ring_event(evp, KM_USER1); ++ } else /* only need to know availability */ ++ ret = 1; + } + spin_unlock(&info->ring_lock); + +@@ -1251,6 +1268,13 @@ static void io_destroy(struct kioctx *io + + aio_cancel_all(ioctx); + wait_for_all_aios(ioctx); ++#ifdef CONFIG_EPOLL ++ /* forget the poll file, but it's up to the user to close it */ ++ if (ioctx->file) { ++ ioctx->file->private_data = 0; ++ ioctx->file = 0; ++ } ++#endif + + /* + * Wake up any waiters. The setting of ctx->dead must be seen +@@ -1261,6 +1285,67 @@ static void io_destroy(struct kioctx *io + put_ioctx(ioctx); /* once for the lookup */ + } + ++#ifdef CONFIG_EPOLL ++ ++static int aio_queue_fd_close(struct inode *inode, struct file *file) ++{ ++ struct kioctx *ioctx = file->private_data; ++ if (ioctx) { ++ file->private_data = 0; ++ spin_lock_irq(&ioctx->ctx_lock); ++ ioctx->file = 0; ++ spin_unlock_irq(&ioctx->ctx_lock); ++ } ++ return 0; ++} ++ ++static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait) ++{ unsigned int pollflags = 0; ++ struct kioctx *ioctx = file->private_data; ++ ++ if (ioctx) { ++ ++ spin_lock_irq(&ioctx->ctx_lock); ++ /* Insert inside our poll wait queue */ ++ poll_wait(file, &ioctx->poll_wait, wait); ++ ++ /* Check our condition */ ++ if (aio_read_evt(ioctx, 0)) ++ pollflags = POLLIN | POLLRDNORM; ++ spin_unlock_irq(&ioctx->ctx_lock); ++ } ++ ++ return pollflags; ++} ++ ++static const struct file_operations aioq_fops = { ++ .release = aio_queue_fd_close, ++ .poll = aio_queue_fd_poll ++}; ++ ++/* make_aio_fd: ++ * Create a file descriptor that can be used to poll the event queue. ++ * Based and piggybacked on the excellent epoll code. ++ */ ++ ++static int make_aio_fd(struct kioctx *ioctx) ++{ ++ int error, fd; ++ struct inode *inode; ++ struct file *file; ++ ++ error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops); ++ if (error) ++ return error; ++ ++ /* associate the file with the IO context */ ++ file->private_data = ioctx; ++ ioctx->file = file; ++ init_waitqueue_head(&ioctx->poll_wait); ++ return fd; ++} ++#endif ++ + /* sys_io_setup: + * Create an aio_context capable of receiving at least nr_events. + * ctxp must not point to an aio_context that already exists, and +@@ -1273,18 +1358,30 @@ static void io_destroy(struct kioctx *io + * resources are available. May fail with -EFAULT if an invalid + * pointer is passed for ctxp. Will fail with -ENOSYS if not + * implemented. ++ * ++ * To request a selectable fd, the user context has to be initialized ++ * to 1, instead of 0, and the return value is the fd. ++ * This keeps the system call compatible, since a non-zero value ++ * was not allowed so far. + */ + SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) + { + struct kioctx *ioctx = NULL; + unsigned long ctx; + long ret; ++ int make_fd = 0; + + ret = get_user(ctx, ctxp); + if (unlikely(ret)) + goto out; + + ret = -EINVAL; ++#ifdef CONFIG_EPOLL ++ if (ctx == 1) { ++ make_fd = 1; ++ ctx = 0; ++ } ++#endif + if (unlikely(ctx || nr_events == 0)) { + pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n", + ctx, nr_events); +@@ -1295,8 +1392,12 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_e + ret = PTR_ERR(ioctx); + if (!IS_ERR(ioctx)) { + ret = put_user(ioctx->user_id, ctxp); +- if (!ret) +- return 0; ++#ifdef CONFIG_EPOLL ++ if (make_fd && ret >= 0) ++ ret = make_aio_fd(ioctx); ++#endif ++ if (ret >= 0) ++ return ret; + + get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */ + io_destroy(ioctx); +Index: linux-2.6.27/fs/compat_ioctl.c +=================================================================== +--- linux-2.6.27.orig/fs/compat_ioctl.c ++++ linux-2.6.27/fs/compat_ioctl.c +@@ -114,6 +114,13 @@ + #include + #endif + ++#ifdef CONFIG_XEN ++#include ++#include ++#include ++#include ++#endif ++ + static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd, + unsigned long arg, struct file *f) + { +@@ -2736,6 +2743,18 @@ IGNORE_IOCTL(FBIOGETCMAP32) + IGNORE_IOCTL(FBIOSCURSOR32) + IGNORE_IOCTL(FBIOGCURSOR32) + #endif ++ ++#ifdef CONFIG_XEN ++HANDLE_IOCTL(IOCTL_PRIVCMD_MMAP_32, privcmd_ioctl_32) ++HANDLE_IOCTL(IOCTL_PRIVCMD_MMAPBATCH_32, privcmd_ioctl_32) ++COMPATIBLE_IOCTL(IOCTL_PRIVCMD_HYPERCALL) ++COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_VIRQ) ++COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_INTERDOMAIN) ++COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_UNBOUND_PORT) ++COMPATIBLE_IOCTL(IOCTL_EVTCHN_UNBIND) ++COMPATIBLE_IOCTL(IOCTL_EVTCHN_NOTIFY) ++COMPATIBLE_IOCTL(IOCTL_EVTCHN_RESET) ++#endif + }; + + #define IOCTL_HASHSIZE 256 +Index: linux-2.6.27/include/acpi/processor.h +=================================================================== +--- linux-2.6.27.orig/include/acpi/processor.h ++++ linux-2.6.27/include/acpi/processor.h +@@ -17,6 +17,12 @@ + #define ACPI_PROCESSOR_MAX_THROTTLE 250 /* 25% */ + #define ACPI_PROCESSOR_MAX_DUTY_WIDTH 4 + ++#ifdef CONFIG_XEN ++#define NR_ACPI_CPUS (NR_CPUS < 256 ? 256 : NR_CPUS) ++#else ++#define NR_ACPI_CPUS NR_CPUS ++#endif /* CONFIG_XEN */ ++ + #define ACPI_PDC_REVISION_ID 0x1 + + #define ACPI_PSD_REV0_REVISION 0 /* Support for _PSD as in ACPI 3.0 */ +@@ -42,6 +48,17 @@ + + struct acpi_processor_cx; + ++#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL ++struct acpi_csd_package { ++ acpi_integer num_entries; ++ acpi_integer revision; ++ acpi_integer domain; ++ acpi_integer coord_type; ++ acpi_integer num_processors; ++ acpi_integer index; ++} __attribute__ ((packed)); ++#endif ++ + struct acpi_power_register { + u8 descriptor; + u16 length; +@@ -74,6 +91,12 @@ struct acpi_processor_cx { + u32 power; + u32 usage; + u64 time; ++#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL ++ /* Require raw information for external control logic */ ++ struct acpi_power_register reg; ++ u32 csd_count; ++ struct acpi_csd_package *domain_info; ++#endif + struct acpi_processor_cx_policy promotion; + struct acpi_processor_cx_policy demotion; + char desc[ACPI_CX_DESC_LEN]; +@@ -304,6 +327,9 @@ static inline void acpi_processor_ppc_ex + { + return; + } ++#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL ++int acpi_processor_ppc_has_changed(struct acpi_processor *pr); ++#else + static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr) + { + static unsigned int printout = 1; +@@ -316,6 +342,7 @@ static inline int acpi_processor_ppc_has + } + return 0; + } ++#endif /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */ + #endif /* CONFIG_CPU_FREQ */ + + /* in processor_throttling.c */ +@@ -352,4 +379,120 @@ static inline void acpi_thermal_cpufreq_ + } + #endif + ++/* ++ * Following are interfaces geared to external processor PM control ++ * logic like a VMM ++ */ ++/* Events notified to external control logic */ ++#define PROCESSOR_PM_INIT 1 ++#define PROCESSOR_PM_CHANGE 2 ++#define PROCESSOR_HOTPLUG 3 ++ ++/* Objects for the PM events */ ++#define PM_TYPE_IDLE 0 ++#define PM_TYPE_PERF 1 ++#define PM_TYPE_THR 2 ++#define PM_TYPE_MAX 3 ++ ++/* Processor hotplug events */ ++#define HOTPLUG_TYPE_ADD 0 ++#define HOTPLUG_TYPE_REMOVE 1 ++ ++#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL ++struct processor_extcntl_ops { ++ /* Transfer processor PM events to external control logic */ ++ int (*pm_ops[PM_TYPE_MAX])(struct acpi_processor *pr, int event); ++ /* Notify physical processor status to external control logic */ ++ int (*hotplug)(struct acpi_processor *pr, int type); ++}; ++extern const struct processor_extcntl_ops *processor_extcntl_ops; ++ ++static inline int processor_cntl_external(void) ++{ ++ return (processor_extcntl_ops != NULL); ++} ++ ++static inline int processor_pm_external(void) ++{ ++ return processor_cntl_external() && ++ (processor_extcntl_ops->pm_ops[PM_TYPE_IDLE] != NULL); ++} ++ ++static inline int processor_pmperf_external(void) ++{ ++ return processor_cntl_external() && ++ (processor_extcntl_ops->pm_ops[PM_TYPE_PERF] != NULL); ++} ++ ++static inline int processor_pmthr_external(void) ++{ ++ return processor_cntl_external() && ++ (processor_extcntl_ops->pm_ops[PM_TYPE_THR] != NULL); ++} ++ ++extern int processor_notify_external(struct acpi_processor *pr, ++ int event, int type); ++extern void processor_extcntl_init(void); ++extern int processor_extcntl_prepare(struct acpi_processor *pr); ++extern int acpi_processor_get_performance_info(struct acpi_processor *pr); ++extern int acpi_processor_get_psd(struct acpi_processor *pr); ++void arch_acpi_processor_init_extcntl(const struct processor_extcntl_ops **); ++#else ++static inline int processor_cntl_external(void) {return 0;} ++static inline int processor_pm_external(void) {return 0;} ++static inline int processor_pmperf_external(void) {return 0;} ++static inline int processor_pmthr_external(void) {return 0;} ++static inline int processor_notify_external(struct acpi_processor *pr, ++ int event, int type) ++{ ++ return 0; ++} ++static inline void processor_extcntl_init(void) {} ++static inline int processor_extcntl_prepare(struct acpi_processor *pr) ++{ ++ return 0; ++} ++#endif /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */ ++ ++#ifdef CONFIG_XEN ++static inline void xen_convert_pct_reg(struct xen_pct_register *xpct, ++ struct acpi_pct_register *apct) ++{ ++ xpct->descriptor = apct->descriptor; ++ xpct->length = apct->length; ++ xpct->space_id = apct->space_id; ++ xpct->bit_width = apct->bit_width; ++ xpct->bit_offset = apct->bit_offset; ++ xpct->reserved = apct->reserved; ++ xpct->address = apct->address; ++} ++ ++static inline void xen_convert_pss_states(struct xen_processor_px *xpss, ++ struct acpi_processor_px *apss, int state_count) ++{ ++ int i; ++ for(i=0; icore_frequency = apss->core_frequency; ++ xpss->power = apss->power; ++ xpss->transition_latency = apss->transition_latency; ++ xpss->bus_master_latency = apss->bus_master_latency; ++ xpss->control = apss->control; ++ xpss->status = apss->status; ++ xpss++; ++ apss++; ++ } ++} ++ ++static inline void xen_convert_psd_pack(struct xen_psd_package *xpsd, ++ struct acpi_psd_package *apsd) ++{ ++ xpsd->num_entries = apsd->num_entries; ++ xpsd->revision = apsd->revision; ++ xpsd->domain = apsd->domain; ++ xpsd->coord_type = apsd->coord_type; ++ xpsd->num_processors = apsd->num_processors; ++} ++ ++#endif /* CONFIG_XEN */ ++ + #endif +Index: linux-2.6.27/include/asm-generic/pci.h +=================================================================== +--- linux-2.6.27.orig/include/asm-generic/pci.h ++++ linux-2.6.27/include/asm-generic/pci.h +@@ -43,7 +43,9 @@ pcibios_select_root(struct pci_dev *pdev + return root; + } + ++#ifndef pcibios_scan_all_fns + #define pcibios_scan_all_fns(a, b) 0 ++#endif + + #ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ + static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) +Index: linux-2.6.27/include/asm-generic/pgtable.h +=================================================================== +--- linux-2.6.27.orig/include/asm-generic/pgtable.h ++++ linux-2.6.27/include/asm-generic/pgtable.h +@@ -99,6 +99,10 @@ static inline void ptep_set_wrprotect(st + } + #endif + ++#ifndef arch_change_pte_range ++#define arch_change_pte_range(mm, pmd, addr, end, newprot) 0 ++#endif ++ + #ifndef __HAVE_ARCH_PTE_SAME + #define pte_same(A,B) (pte_val(A) == pte_val(B)) + #endif +Index: linux-2.6.27/include/linux/aio.h +=================================================================== +--- linux-2.6.27.orig/include/linux/aio.h ++++ linux-2.6.27/include/linux/aio.h +@@ -199,6 +199,11 @@ struct kioctx { + struct aio_ring_info ring_info; + + struct delayed_work wq; ++#ifdef CONFIG_EPOLL ++ // poll integration ++ wait_queue_head_t poll_wait; ++ struct file *file; ++#endif + }; + + /* prototypes */ +Index: linux-2.6.27/include/linux/highmem.h +=================================================================== +--- linux-2.6.27.orig/include/linux/highmem.h ++++ linux-2.6.27/include/linux/highmem.h +@@ -62,6 +62,7 @@ static inline void *kmap_atomic(struct p + + #endif /* CONFIG_HIGHMEM */ + ++#ifndef __HAVE_ARCH_CLEAR_USER_HIGHPAGE + /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ + static inline void clear_user_highpage(struct page *page, unsigned long vaddr) + { +@@ -69,6 +70,7 @@ static inline void clear_user_highpage(s + clear_user_page(addr, vaddr, page); + kunmap_atomic(addr, KM_USER0); + } ++#endif + + #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + /** +@@ -115,12 +117,14 @@ alloc_zeroed_user_highpage_movable(struc + return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr); + } + ++#ifndef __HAVE_ARCH_CLEAR_HIGHPAGE + static inline void clear_highpage(struct page *page) + { + void *kaddr = kmap_atomic(page, KM_USER0); + clear_page(kaddr); + kunmap_atomic(kaddr, KM_USER0); + } ++#endif + + static inline void zero_user_segments(struct page *page, + unsigned start1, unsigned end1, +@@ -174,6 +178,8 @@ static inline void copy_user_highpage(st + + #endif + ++#ifndef __HAVE_ARCH_COPY_HIGHPAGE ++ + static inline void copy_highpage(struct page *to, struct page *from) + { + char *vfrom, *vto; +@@ -185,4 +191,6 @@ static inline void copy_highpage(struct + kunmap_atomic(vto, KM_USER1); + } + ++#endif ++ + #endif /* _LINUX_HIGHMEM_H */ +Index: linux-2.6.27/include/linux/interrupt.h +=================================================================== +--- linux-2.6.27.orig/include/linux/interrupt.h ++++ linux-2.6.27/include/linux/interrupt.h +@@ -218,6 +218,12 @@ static inline int disable_irq_wake(unsig + } + #endif /* CONFIG_GENERIC_HARDIRQS */ + ++#ifdef CONFIG_HAVE_IRQ_IGNORE_UNHANDLED ++int irq_ignore_unhandled(unsigned int irq); ++#else ++#define irq_ignore_unhandled(irq) 0 ++#endif ++ + #ifndef __ARCH_SET_SOFTIRQ_PENDING + #define set_softirq_pending(x) (local_softirq_pending() = (x)) + #define or_softirq_pending(x) (local_softirq_pending() |= (x)) +Index: linux-2.6.27/include/linux/kexec.h +=================================================================== +--- linux-2.6.27.orig/include/linux/kexec.h ++++ linux-2.6.27/include/linux/kexec.h +@@ -46,6 +46,13 @@ + KEXEC_CORE_NOTE_NAME_BYTES + \ + KEXEC_CORE_NOTE_DESC_BYTES ) + ++#ifndef KEXEC_ARCH_HAS_PAGE_MACROS ++#define kexec_page_to_pfn(page) page_to_pfn(page) ++#define kexec_pfn_to_page(pfn) pfn_to_page(pfn) ++#define kexec_virt_to_phys(addr) virt_to_phys(addr) ++#define kexec_phys_to_virt(addr) phys_to_virt(addr) ++#endif ++ + /* + * This structure is used to hold the arguments that are used when loading + * kernel binaries. +@@ -108,6 +115,12 @@ struct kimage { + extern void machine_kexec(struct kimage *image); + extern int machine_kexec_prepare(struct kimage *image); + extern void machine_kexec_cleanup(struct kimage *image); ++#ifdef CONFIG_XEN ++extern int xen_machine_kexec_load(struct kimage *image); ++extern void xen_machine_kexec_unload(struct kimage *image); ++extern void xen_machine_kexec_setup_resources(void); ++extern void xen_machine_kexec_register_resources(struct resource *res); ++#endif + extern asmlinkage long sys_kexec_load(unsigned long entry, + unsigned long nr_segments, + struct kexec_segment __user *segments, +Index: linux-2.6.27/include/linux/mm.h +=================================================================== +--- linux-2.6.27.orig/include/linux/mm.h ++++ linux-2.6.27/include/linux/mm.h +@@ -113,6 +113,9 @@ extern unsigned int kobjsize(const void + #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ + #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ + #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ ++#ifdef CONFIG_XEN ++#define VM_FOREIGN 0x40000000 /* Has pages belonging to another VM */ ++#endif + #define VM_PAGE_MKWRITE2 0x80000000 /* Uses page_mkwrite2 rather than page_mkwrite */ + + #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ +@@ -194,6 +197,11 @@ struct vm_operations_struct { + */ + int (*access)(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write); ++ ++ /* Area-specific function for clearing the PTE at @ptep. Returns the ++ * original value of @ptep. */ ++ pte_t (*zap_pte)(struct vm_area_struct *vma, ++ unsigned long addr, pte_t *ptep, int is_fullmm); + #ifdef CONFIG_NUMA + /* + * set_policy() op must add a reference to any non-NULL @new mempolicy +Index: linux-2.6.27/include/linux/oprofile.h +=================================================================== +--- linux-2.6.27.orig/include/linux/oprofile.h ++++ linux-2.6.27/include/linux/oprofile.h +@@ -16,6 +16,8 @@ + #include + #include + #include ++ ++#include + + /* Each escaped entry is prefixed by ESCAPE_CODE + * then one of the following codes, then the +@@ -28,7 +30,7 @@ + #define CPU_SWITCH_CODE 2 + #define COOKIE_SWITCH_CODE 3 + #define KERNEL_ENTER_SWITCH_CODE 4 +-#define KERNEL_EXIT_SWITCH_CODE 5 ++#define USER_ENTER_SWITCH_CODE 5 + #define MODULE_LOADED_CODE 6 + #define CTX_TGID_CODE 7 + #define TRACE_BEGIN_CODE 8 +@@ -36,6 +38,7 @@ + #define XEN_ENTER_SWITCH_CODE 10 + #define SPU_PROFILING_CODE 11 + #define SPU_CTX_SWITCH_CODE 12 ++#define DOMAIN_SWITCH_CODE 13 + + struct super_block; + struct dentry; +@@ -47,6 +50,11 @@ struct oprofile_operations { + /* create any necessary configuration files in the oprofile fs. + * Optional. */ + int (*create_files)(struct super_block * sb, struct dentry * root); ++ /* setup active domains with Xen */ ++ int (*set_active)(int *active_domains, unsigned int adomains); ++ /* setup passive domains with Xen */ ++ int (*set_passive)(int *passive_domains, unsigned int pdomains); ++ + /* Do any necessary interrupt setup. Optional. */ + int (*setup)(void); + /* Do any necessary interrupt shutdown. Optional. */ +@@ -106,6 +114,8 @@ void oprofile_add_pc(unsigned long pc, i + /* add a backtrace entry, to be called from the ->backtrace callback */ + void oprofile_add_trace(unsigned long eip); + ++/* add a domain switch entry */ ++int oprofile_add_domain_switch(int32_t domain_id); + + /** + * Create a file of the given name as a child of the given root, with +Index: linux-2.6.27/include/linux/page-flags.h +=================================================================== +--- linux-2.6.27.orig/include/linux/page-flags.h ++++ linux-2.6.27/include/linux/page-flags.h +@@ -98,6 +98,9 @@ enum pageflags { + #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR + PG_uncached, /* Page has been mapped as uncached */ + #endif ++#ifdef CONFIG_XEN ++ PG_foreign, /* Page is owned by foreign allocator. */ ++#endif + __NR_PAGEFLAGS, + + /* Filesystems */ +@@ -271,6 +274,19 @@ static inline void SetPageUptodate(struc + + CLEARPAGEFLAG(Uptodate, uptodate) + ++#define PageForeign(page) test_bit(PG_foreign, &(page)->flags) ++#define SetPageForeign(_page, dtor) do { \ ++ set_bit(PG_foreign, &(_page)->flags); \ ++ BUG_ON((dtor) == (void (*)(struct page *))0); \ ++ (_page)->index = (long)(dtor); \ ++} while (0) ++#define ClearPageForeign(page) do { \ ++ clear_bit(PG_foreign, &(page)->flags); \ ++ (page)->index = 0; \ ++} while (0) ++#define PageForeignDestructor(_page) \ ++ ((void (*)(struct page *))(_page)->index)(_page) ++ + extern void cancel_dirty_page(struct page *page, unsigned int account_size); + + int test_clear_page_writeback(struct page *page); +@@ -341,9 +357,18 @@ PAGEFLAG(MemError, memerror) + PAGEFLAG_FALSE(MemError) + #endif + ++#if !defined(CONFIG_XEN) ++# define PAGE_FLAGS_XEN 0 ++#elif defined(CONFIG_X86) ++# define PAGE_FLAGS_XEN ((1 << PG_pinned) | (1 << PG_foreign)) ++#else ++# define PAGE_FLAGS_XEN (1 << PG_foreign) ++#endif ++ + #define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ + 1 << PG_buddy | 1 << PG_writeback | 1 << PG_waiters | \ +- 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active) ++ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ ++ PAGE_FLAGS_XEN) + + /* + * Flags checked in bad_page(). Pages on the free list should not have +Index: linux-2.6.27/include/linux/pci.h +=================================================================== +--- linux-2.6.27.orig/include/linux/pci.h ++++ linux-2.6.27/include/linux/pci.h +@@ -211,6 +211,9 @@ struct pci_dev { + * directly, use the values stored here. They might be different! + */ + unsigned int irq; ++#ifdef CONFIG_XEN ++ unsigned int irq_old; ++#endif + struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ + + /* These fields are used by common fixups */ +@@ -772,6 +775,11 @@ static inline int pci_msi_enabled(void) + { + return 0; + } ++ ++#ifdef CONFIG_XEN ++#define register_msi_get_owner(func) 0 ++#define unregister_msi_get_owner(func) 0 ++#endif + #else + extern int pci_enable_msi(struct pci_dev *dev); + extern void pci_msi_shutdown(struct pci_dev *dev); +@@ -784,6 +792,10 @@ extern void msi_remove_pci_irq_vectors(s + extern void pci_restore_msi_state(struct pci_dev *dev); + extern int pci_msi_enabled(void); + ++#ifdef CONFIG_XEN ++extern int register_msi_get_owner(int (*func)(struct pci_dev *dev)); ++extern int unregister_msi_get_owner(int (*func)(struct pci_dev *dev)); ++#endif + #endif + + #ifndef CONFIG_PCIEASPM +Index: linux-2.6.27/include/linux/skbuff.h +=================================================================== +--- linux-2.6.27.orig/include/linux/skbuff.h ++++ linux-2.6.27/include/linux/skbuff.h +@@ -217,6 +217,8 @@ typedef unsigned char *sk_buff_data_t; + * @local_df: allow local fragmentation + * @cloned: Head may be cloned (check refcnt to be sure) + * @nohdr: Payload reference only, must not modify header ++ * @proto_data_valid: Protocol data validated since arriving at localhost ++ * @proto_csum_blank: Protocol csum must be added before leaving localhost + * @pkt_type: Packet class + * @fclone: skbuff clone status + * @ip_summed: Driver fed us an IP checksum +@@ -323,7 +325,11 @@ struct sk_buff { + #ifdef CONFIG_NETVM + __u8 emergency:1; + #endif +- /* 12-16 bit hole */ ++#ifdef CONFIG_XEN ++ __u8 proto_data_valid:1, ++ proto_csum_blank:1; ++#endif ++ /* 10-16 bit hole */ + + #ifdef CONFIG_NET_DMA + dma_cookie_t dma_cookie; +Index: linux-2.6.27/include/linux/vermagic.h +=================================================================== +--- linux-2.6.27.orig/include/linux/vermagic.h ++++ linux-2.6.27/include/linux/vermagic.h +@@ -22,6 +22,11 @@ + #else + #define MODULE_VERMAGIC_MODVERSIONS "" + #endif ++#ifdef CONFIG_XEN ++#define MODULE_VERMAGIC_XEN "Xen " ++#else ++#define MODULE_VERMAGIC_XEN ++#endif + #ifndef MODULE_ARCH_VERMAGIC + #define MODULE_ARCH_VERMAGIC "" + #endif +@@ -30,5 +35,5 @@ + UTS_RELEASE " " \ + MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ + MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \ +- MODULE_ARCH_VERMAGIC ++ MODULE_VERMAGIC_XEN MODULE_ARCH_VERMAGIC + +Index: linux-2.6.27/kernel/irq/spurious.c +=================================================================== +--- linux-2.6.27.orig/kernel/irq/spurious.c ++++ linux-2.6.27/kernel/irq/spurious.c +@@ -193,7 +193,7 @@ void note_interrupt(unsigned int irq, st + */ + if (time_after(jiffies, desc->last_unhandled + HZ/10)) + desc->irqs_unhandled = 1; +- else ++ else if (!irq_ignore_unhandled(irq)) + desc->irqs_unhandled++; + desc->last_unhandled = jiffies; + if (unlikely(action_ret != IRQ_NONE)) +Index: linux-2.6.27/kernel/kexec.c +=================================================================== +--- linux-2.6.27.orig/kernel/kexec.c ++++ linux-2.6.27/kernel/kexec.c +@@ -359,13 +359,26 @@ static int kimage_is_destination_range(s + return 0; + } + +-static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) ++static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order, unsigned long limit) + { + struct page *pages; + + pages = alloc_pages(gfp_mask, order); + if (pages) { + unsigned int count, i; ++#ifdef CONFIG_XEN ++ int address_bits; ++ ++ if (limit == ~0UL) ++ address_bits = BITS_PER_LONG; ++ else ++ address_bits = long_log2(limit); ++ ++ if (xen_limit_pages_to_max_mfn(pages, order, address_bits) < 0) { ++ __free_pages(pages, order); ++ return NULL; ++ } ++#endif + pages->mapping = NULL; + set_page_private(pages, order); + count = 1 << order; +@@ -384,6 +397,9 @@ static void kimage_free_pages(struct pag + count = 1 << order; + for (i = 0; i < count; i++) + ClearPageReserved(page + i); ++#ifdef CONFIG_XEN ++ xen_destroy_contiguous_region((unsigned long)page_address(page), order); ++#endif + __free_pages(page, order); + } + +@@ -429,10 +445,10 @@ static struct page *kimage_alloc_normal_ + do { + unsigned long pfn, epfn, addr, eaddr; + +- pages = kimage_alloc_pages(GFP_KERNEL, order); ++ pages = kimage_alloc_pages(GFP_KERNEL, order, KEXEC_CONTROL_MEMORY_LIMIT); + if (!pages) + break; +- pfn = page_to_pfn(pages); ++ pfn = kexec_page_to_pfn(pages); + epfn = pfn + count; + addr = pfn << PAGE_SHIFT; + eaddr = epfn << PAGE_SHIFT; +@@ -466,6 +482,7 @@ static struct page *kimage_alloc_normal_ + return pages; + } + ++#ifndef CONFIG_XEN + static struct page *kimage_alloc_crash_control_pages(struct kimage *image, + unsigned int order) + { +@@ -519,7 +536,7 @@ static struct page *kimage_alloc_crash_c + } + /* If I don't overlap any segments I have found my hole! */ + if (i == image->nr_segments) { +- pages = pfn_to_page(hole_start >> PAGE_SHIFT); ++ pages = kexec_pfn_to_page(hole_start >> PAGE_SHIFT); + break; + } + } +@@ -546,6 +563,13 @@ struct page *kimage_alloc_control_pages( + + return pages; + } ++#else /* !CONFIG_XEN */ ++struct page *kimage_alloc_control_pages(struct kimage *image, ++ unsigned int order) ++{ ++ return kimage_alloc_normal_control_pages(image, order); ++} ++#endif + + static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) + { +@@ -561,7 +585,7 @@ static int kimage_add_entry(struct kimag + return -ENOMEM; + + ind_page = page_address(page); +- *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; ++ *image->entry = kexec_virt_to_phys(ind_page) | IND_INDIRECTION; + image->entry = ind_page; + image->last_entry = ind_page + + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); +@@ -620,13 +644,13 @@ static void kimage_terminate(struct kima + #define for_each_kimage_entry(image, ptr, entry) \ + for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ + ptr = (entry & IND_INDIRECTION)? \ +- phys_to_virt((entry & PAGE_MASK)): ptr +1) ++ kexec_phys_to_virt((entry & PAGE_MASK)): ptr +1) + + static void kimage_free_entry(kimage_entry_t entry) + { + struct page *page; + +- page = pfn_to_page(entry >> PAGE_SHIFT); ++ page = kexec_pfn_to_page(entry >> PAGE_SHIFT); + kimage_free_pages(page); + } + +@@ -638,6 +662,10 @@ static void kimage_free(struct kimage *i + if (!image) + return; + ++#ifdef CONFIG_XEN ++ xen_machine_kexec_unload(image); ++#endif ++ + kimage_free_extra_pages(image); + for_each_kimage_entry(image, ptr, entry) { + if (entry & IND_INDIRECTION) { +@@ -713,7 +741,7 @@ static struct page *kimage_alloc_page(st + * have a match. + */ + list_for_each_entry(page, &image->dest_pages, lru) { +- addr = page_to_pfn(page) << PAGE_SHIFT; ++ addr = kexec_page_to_pfn(page) << PAGE_SHIFT; + if (addr == destination) { + list_del(&page->lru); + return page; +@@ -724,16 +752,16 @@ static struct page *kimage_alloc_page(st + kimage_entry_t *old; + + /* Allocate a page, if we run out of memory give up */ +- page = kimage_alloc_pages(gfp_mask, 0); ++ page = kimage_alloc_pages(gfp_mask, 0, KEXEC_SOURCE_MEMORY_LIMIT); + if (!page) + return NULL; + /* If the page cannot be used file it away */ +- if (page_to_pfn(page) > ++ if (kexec_page_to_pfn(page) > + (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { + list_add(&page->lru, &image->unuseable_pages); + continue; + } +- addr = page_to_pfn(page) << PAGE_SHIFT; ++ addr = kexec_page_to_pfn(page) << PAGE_SHIFT; + + /* If it is the destination page we want use it */ + if (addr == destination) +@@ -756,7 +784,7 @@ static struct page *kimage_alloc_page(st + struct page *old_page; + + old_addr = *old & PAGE_MASK; +- old_page = pfn_to_page(old_addr >> PAGE_SHIFT); ++ old_page = kexec_pfn_to_page(old_addr >> PAGE_SHIFT); + copy_highpage(page, old_page); + *old = addr | (*old & ~PAGE_MASK); + +@@ -812,7 +840,7 @@ static int kimage_load_normal_segment(st + result = -ENOMEM; + goto out; + } +- result = kimage_add_page(image, page_to_pfn(page) ++ result = kimage_add_page(image, kexec_page_to_pfn(page) + << PAGE_SHIFT); + if (result < 0) + goto out; +@@ -844,6 +872,7 @@ out: + return result; + } + ++#ifndef CONFIG_XEN + static int kimage_load_crash_segment(struct kimage *image, + struct kexec_segment *segment) + { +@@ -866,7 +895,7 @@ static int kimage_load_crash_segment(str + char *ptr; + size_t uchunk, mchunk; + +- page = pfn_to_page(maddr >> PAGE_SHIFT); ++ page = kexec_pfn_to_page(maddr >> PAGE_SHIFT); + if (!page) { + result = -ENOMEM; + goto out; +@@ -915,6 +944,13 @@ static int kimage_load_segment(struct ki + + return result; + } ++#else /* CONFIG_XEN */ ++static int kimage_load_segment(struct kimage *image, ++ struct kexec_segment *segment) ++{ ++ return kimage_load_normal_segment(image, segment); ++} ++#endif + + /* + * Exec Kernel system call: for obvious reasons only root may call it. +@@ -1018,6 +1054,13 @@ SYSCALL_DEFINE4(kexec_load, unsigned lon + } + kimage_terminate(image); + } ++#ifdef CONFIG_XEN ++ if (image) { ++ result = xen_machine_kexec_load(image); ++ if (result) ++ goto out; ++ } ++#endif + /* Install the new kernel, and Uninstall the old */ + image = xchg(dest_image, image); + +Index: linux-2.6.27/kernel/sysctl.c +=================================================================== +--- linux-2.6.27.orig/kernel/sysctl.c ++++ linux-2.6.27/kernel/sysctl.c +@@ -751,7 +751,7 @@ static struct ctl_table kern_table[] = { + .proc_handler = &proc_dointvec, + }, + #endif +-#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86) ++#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86) && !defined(CONFIG_ACPI_PV_SLEEP) + { + .procname = "acpi_video_flags", + .data = &acpi_realmode_flags, +Index: linux-2.6.27/mm/memory.c +=================================================================== +--- linux-2.6.27.orig/mm/memory.c ++++ linux-2.6.27/mm/memory.c +@@ -446,6 +446,12 @@ struct page *vm_normal_page(struct vm_ar + { + unsigned long pfn; + ++#if defined(CONFIG_XEN) && defined(CONFIG_X86) ++ /* XEN: Covers user-space grant mappings (even of local pages). */ ++ if (unlikely(vma->vm_flags & VM_FOREIGN)) ++ return NULL; ++#endif ++ + if (HAVE_PTE_SPECIAL) { + if (likely(!pte_special(pte))) { + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); +@@ -474,7 +480,14 @@ struct page *vm_normal_page(struct vm_ar + } + } + ++#ifndef CONFIG_XEN + VM_BUG_ON(!pfn_valid(pfn)); ++#else ++ if (unlikely(!pfn_valid(pfn))) { ++ VM_BUG_ON(!(vma->vm_flags & VM_RESERVED)); ++ return NULL; ++ } ++#endif + + /* + * NOTE! We still have PageReserved() pages in the page tables. +@@ -745,8 +758,12 @@ static unsigned long zap_pte_range(struc + page->index > details->last_index)) + continue; + } +- ptent = ptep_get_and_clear_full(mm, addr, pte, +- tlb->fullmm); ++ if (unlikely(vma->vm_ops && vma->vm_ops->zap_pte)) ++ ptent = vma->vm_ops->zap_pte(vma, addr, pte, ++ tlb->fullmm); ++ else ++ ptent = ptep_get_and_clear_full(mm, addr, pte, ++ tlb->fullmm); + tlb_remove_tlb_entry(tlb, pte, addr); + if (unlikely(!page)) + continue; +@@ -996,6 +1013,7 @@ unsigned long zap_page_range(struct vm_a + tlb_finish_mmu(tlb, address, end); + return end; + } ++EXPORT_SYMBOL(zap_page_range); + + /** + * zap_vma_ptes - remove ptes mapping the vma +@@ -1193,6 +1211,26 @@ int get_user_pages(struct task_struct *t + continue; + } + ++#ifdef CONFIG_XEN ++ if (vma && (vma->vm_flags & VM_FOREIGN)) { ++ struct page **map = vma->vm_private_data; ++ int offset = (start - vma->vm_start) >> PAGE_SHIFT; ++ if (map[offset] != NULL) { ++ if (pages) { ++ struct page *page = map[offset]; ++ ++ pages[i] = page; ++ get_page(page); ++ } ++ if (vmas) ++ vmas[i] = vma; ++ i++; ++ start += PAGE_SIZE; ++ len--; ++ continue; ++ } ++ } ++#endif + if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP)) + || !(vm_flags & vma->vm_flags)) + return i ? : -EFAULT; +Index: linux-2.6.27/mm/mprotect.c +=================================================================== +--- linux-2.6.27.orig/mm/mprotect.c ++++ linux-2.6.27/mm/mprotect.c +@@ -92,6 +92,8 @@ static inline void change_pmd_range(stru + next = pmd_addr_end(addr, end); + if (pmd_none_or_clear_bad(pmd)) + continue; ++ if (arch_change_pte_range(mm, pmd, addr, next, newprot)) ++ continue; + change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable); + } while (pmd++, addr = next, addr != end); + } +Index: linux-2.6.27/mm/page_alloc.c +=================================================================== +--- linux-2.6.27.orig/mm/page_alloc.c ++++ linux-2.6.27/mm/page_alloc.c +@@ -533,6 +533,12 @@ static void __free_pages_ok(struct page + int i; + int reserved = 0; + ++#ifdef CONFIG_XEN ++ if (PageForeign(page)) { ++ PageForeignDestructor(page); ++ return; ++ } ++#endif + trace_page_free(page, order); + + for (i = 0 ; i < (1 << order) ; ++i) +@@ -995,6 +1001,12 @@ static void free_hot_cold_page(struct pa + struct per_cpu_pages *pcp; + unsigned long flags; + ++#ifdef CONFIG_XEN ++ if (PageForeign(page)) { ++ PageForeignDestructor(page); ++ return; ++ } ++#endif + trace_page_free(page, 0); + + if (PageAnon(page)) +Index: linux-2.6.27/net/core/dev.c +=================================================================== +--- linux-2.6.27.orig/net/core/dev.c ++++ linux-2.6.27/net/core/dev.c +@@ -131,6 +131,12 @@ + + #include "net-sysfs.h" + ++#ifdef CONFIG_XEN ++#include ++#include ++#include ++#endif ++ + /* + * The list of packet types we will receive (as opposed to discard) + * and the routines to invoke. +@@ -1734,6 +1740,42 @@ static struct netdev_queue *dev_pick_tx( + return netdev_get_tx_queue(dev, queue_index); + } + ++#ifdef CONFIG_XEN ++inline int skb_checksum_setup(struct sk_buff *skb) ++{ ++ if (skb->proto_csum_blank) { ++ if (skb->protocol != htons(ETH_P_IP)) ++ goto out; ++ skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl; ++ if (skb->h.raw >= skb->tail) ++ goto out; ++ switch (skb->nh.iph->protocol) { ++ case IPPROTO_TCP: ++ skb->csum = offsetof(struct tcphdr, check); ++ break; ++ case IPPROTO_UDP: ++ skb->csum = offsetof(struct udphdr, check); ++ break; ++ default: ++ if (net_ratelimit()) ++ printk(KERN_ERR "Attempting to checksum a non-" ++ "TCP/UDP packet, dropping a protocol" ++ " %d packet", skb->nh.iph->protocol); ++ goto out; ++ } ++ if ((skb->h.raw + skb->csum + 2) > skb->tail) ++ goto out; ++ skb->ip_summed = CHECKSUM_HW; ++ skb->proto_csum_blank = 0; ++ } ++ return 0; ++out: ++ return -EPROTO; ++} ++#else ++inline int skb_checksum_setup(struct sk_buff *skb) { return 0; } ++#endif ++ + /** + * dev_queue_xmit - transmit a buffer + * @skb: buffer to transmit +@@ -1766,6 +1808,12 @@ int dev_queue_xmit(struct sk_buff *skb) + struct Qdisc *q; + int rc = -ENOMEM; + ++ /* If a checksum-deferred packet is forwarded to a device that needs a ++ * checksum, correct the pointers and force checksumming. ++ */ ++ if (skb_checksum_setup(skb)) ++ goto out_kfree_skb; ++ + /* GSO will handle the following emulations directly. */ + if (netif_needs_gso(dev, skb)) + goto gso; +@@ -2274,6 +2322,19 @@ int netif_receive_skb(struct sk_buff *sk + } + #endif + ++#ifdef CONFIG_XEN ++ switch (skb->ip_summed) { ++ case CHECKSUM_UNNECESSARY: ++ skb->proto_data_valid = 1; ++ break; ++ case CHECKSUM_HW: ++ /* XXX Implement me. */ ++ default: ++ skb->proto_data_valid = 0; ++ break; ++ } ++#endif ++ + if (skb_emergency(skb)) + goto skip_taps; + +@@ -4928,6 +4989,7 @@ EXPORT_SYMBOL(unregister_netdevice_notif + EXPORT_SYMBOL(net_enable_timestamp); + EXPORT_SYMBOL(net_disable_timestamp); + EXPORT_SYMBOL(dev_get_flags); ++EXPORT_SYMBOL(skb_checksum_setup); + + #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) + EXPORT_SYMBOL(br_handle_frame_hook); +Index: linux-2.6.27/net/core/skbuff.c +=================================================================== +--- linux-2.6.27.orig/net/core/skbuff.c ++++ linux-2.6.27/net/core/skbuff.c +@@ -555,6 +555,10 @@ static struct sk_buff *__skb_clone(struc + n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; + n->cloned = 1; + n->nohdr = 0; ++#ifdef CONFIG_XEN ++ C(proto_data_valid); ++ C(proto_csum_blank); ++#endif + n->destructor = NULL; + C(iif); + C(tail); +Index: linux-2.6.27/net/ipv4/netfilter/nf_nat_proto_tcp.c +=================================================================== +--- linux-2.6.27.orig/net/ipv4/netfilter/nf_nat_proto_tcp.c ++++ linux-2.6.27/net/ipv4/netfilter/nf_nat_proto_tcp.c +@@ -75,6 +75,9 @@ tcp_manip_pkt(struct sk_buff *skb, + if (hdrsize < sizeof(*hdr)) + return true; + ++ if (skb_checksum_setup(skb)) ++ return false; ++ + inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); + inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); + return true; +Index: linux-2.6.27/net/ipv4/netfilter/nf_nat_proto_udp.c +=================================================================== +--- linux-2.6.27.orig/net/ipv4/netfilter/nf_nat_proto_udp.c ++++ linux-2.6.27/net/ipv4/netfilter/nf_nat_proto_udp.c +@@ -60,6 +60,10 @@ udp_manip_pkt(struct sk_buff *skb, + newport = tuple->dst.u.udp.port; + portptr = &hdr->dest; + } ++ ++ if (skb_checksum_setup(skb)) ++ return false; ++ + if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) { + inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); + inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, +Index: linux-2.6.27/net/ipv4/xfrm4_output.c +=================================================================== +--- linux-2.6.27.orig/net/ipv4/xfrm4_output.c ++++ linux-2.6.27/net/ipv4/xfrm4_output.c +@@ -81,7 +81,7 @@ static int xfrm4_output_finish(struct sk + #endif + + skb->protocol = htons(ETH_P_IP); +- return xfrm_output(skb); ++ return skb_checksum_setup(skb) ?: xfrm_output(skb); + } + + int xfrm4_output(struct sk_buff *skb) +Index: linux-2.6.27/scripts/Makefile.build +=================================================================== +--- linux-2.6.27.orig/scripts/Makefile.build ++++ linux-2.6.27/scripts/Makefile.build +@@ -73,6 +73,20 @@ ifndef obj + $(warning kbuild: Makefile.build is included improperly) + endif + ++ifeq ($(CONFIG_XEN),y) ++$(objtree)/scripts/Makefile.xen: $(srctree)/scripts/Makefile.xen.awk $(srctree)/scripts/Makefile.build ++ @echo ' Updating $@' ++ $(if $(shell echo a | $(AWK) '{ print gensub(/a/, "AA", "g"); }'),\ ++ ,$(error 'Your awk program does not define gensub. Use gawk or another awk with gensub')) ++ @$(AWK) -f $< $(filter-out $<,$^) >$@ ++ ++xen-src-single-used-m := $(patsubst $(srctree)/%,%,$(wildcard $(addprefix $(srctree)/,$(single-used-m:.o=-xen.c)))) ++xen-single-used-m := $(xen-src-single-used-m:-xen.c=.o) ++single-used-m := $(filter-out $(xen-single-used-m),$(single-used-m)) ++ ++-include $(objtree)/scripts/Makefile.xen ++endif ++ + # =========================================================================== + + ifneq ($(strip $(lib-y) $(lib-m) $(lib-n) $(lib-)),) +Index: linux-2.6.27/scripts/Makefile.lib +=================================================================== +--- linux-2.6.27.orig/scripts/Makefile.lib ++++ linux-2.6.27/scripts/Makefile.lib +@@ -17,6 +17,12 @@ obj-m := $(filter-out $(obj-y),$(obj-m)) + + lib-y := $(filter-out $(obj-y), $(sort $(lib-y) $(lib-m))) + ++# Remove objects forcibly disabled ++ ++obj-y := $(filter-out $(disabled-obj-y),$(obj-y)) ++obj-m := $(filter-out $(disabled-obj-y),$(obj-m)) ++lib-y := $(filter-out $(disabled-obj-y),$(lib-y)) ++ + + # Handle objects in subdirs + # --------------------------------------------------------------------------- diff --git a/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-include-xen-interface.diff b/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-include-xen-interface.diff new file mode 100644 index 0000000000..dadda2cf23 --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-include-xen-interface.diff @@ -0,0 +1,5161 @@ +Subject: xen3 include-xen-interface +From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 728:832aac894efd) +Patch-mainline: obsolete +Acked-by: jbeulich@novell.com + +Index: head-2008-11-25/include/xen/interface/COPYING +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/COPYING 2007-06-12 13:14:19.000000000 +0200 +@@ -0,0 +1,38 @@ ++XEN NOTICE ++========== ++ ++This copyright applies to all files within this subdirectory and its ++subdirectories: ++ include/public/*.h ++ include/public/hvm/*.h ++ include/public/io/*.h ++ ++The intention is that these files can be freely copied into the source ++tree of an operating system when porting that OS to run on Xen. Doing ++so does *not* cause the OS to become subject to the terms of the GPL. ++ ++All other files in the Xen source distribution are covered by version ++2 of the GNU General Public License except where explicitly stated ++otherwise within individual source files. ++ ++ -- Keir Fraser (on behalf of the Xen team) ++ ++===================================================================== ++ ++Permission is hereby granted, free of charge, to any person obtaining a copy ++of this software and associated documentation files (the "Software"), to ++deal in the Software without restriction, including without limitation the ++rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++sell copies of the Software, and to permit persons to whom the Software is ++furnished to do so, subject to the following conditions: ++ ++The above copyright notice and this permission notice shall be included in ++all copies or substantial portions of the Software. ++ ++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++DEALINGS IN THE SOFTWARE. +Index: head-2008-11-25/include/xen/interface/arch-x86/cpuid.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/arch-x86/cpuid.h 2008-01-21 11:15:27.000000000 +0100 +@@ -0,0 +1,68 @@ ++/****************************************************************************** ++ * arch-x86/cpuid.h ++ * ++ * CPUID interface to Xen. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2007 Citrix Systems, Inc. ++ * ++ * Authors: ++ * Keir Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__ ++#define __XEN_PUBLIC_ARCH_X86_CPUID_H__ ++ ++/* Xen identification leaves start at 0x40000000. */ ++#define XEN_CPUID_FIRST_LEAF 0x40000000 ++#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i)) ++ ++/* ++ * Leaf 1 (0x40000000) ++ * EAX: Largest Xen-information leaf. All leaves up to an including @EAX ++ * are supported by the Xen host. ++ * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification ++ * of a Xen host. ++ */ ++#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */ ++#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */ ++#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */ ++ ++/* ++ * Leaf 2 (0x40000001) ++ * EAX[31:16]: Xen major version. ++ * EAX[15: 0]: Xen minor version. ++ * EBX-EDX: Reserved (currently all zeroes). ++ */ ++ ++/* ++ * Leaf 3 (0x40000002) ++ * EAX: Number of hypercall transfer pages. This register is always guaranteed ++ * to specify one hypercall page. ++ * EBX: Base address of Xen-specific MSRs. ++ * ECX: Features 1. Unused bits are set to zero. ++ * EDX: Features 2. Unused bits are set to zero. ++ */ ++ ++/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */ ++#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0 ++#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0) ++ ++#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */ +Index: head-2008-11-25/include/xen/interface/arch-x86/hvm/save.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/arch-x86/hvm/save.h 2008-10-29 09:55:56.000000000 +0100 +@@ -0,0 +1,440 @@ ++/* ++ * Structure definitions for HVM state that is held by Xen and must ++ * be saved along with the domain's memory and device-model state. ++ * ++ * Copyright (c) 2007 XenSource Ltd. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef __XEN_PUBLIC_HVM_SAVE_X86_H__ ++#define __XEN_PUBLIC_HVM_SAVE_X86_H__ ++ ++/* ++ * Save/restore header: general info about the save file. ++ */ ++ ++#define HVM_FILE_MAGIC 0x54381286 ++#define HVM_FILE_VERSION 0x00000001 ++ ++struct hvm_save_header { ++ uint32_t magic; /* Must be HVM_FILE_MAGIC */ ++ uint32_t version; /* File format version */ ++ uint64_t changeset; /* Version of Xen that saved this file */ ++ uint32_t cpuid; /* CPUID[0x01][%eax] on the saving machine */ ++ uint32_t pad0; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header); ++ ++ ++/* ++ * Processor ++ */ ++ ++struct hvm_hw_cpu { ++ uint8_t fpu_regs[512]; ++ ++ uint64_t rax; ++ uint64_t rbx; ++ uint64_t rcx; ++ uint64_t rdx; ++ uint64_t rbp; ++ uint64_t rsi; ++ uint64_t rdi; ++ uint64_t rsp; ++ uint64_t r8; ++ uint64_t r9; ++ uint64_t r10; ++ uint64_t r11; ++ uint64_t r12; ++ uint64_t r13; ++ uint64_t r14; ++ uint64_t r15; ++ ++ uint64_t rip; ++ uint64_t rflags; ++ ++ uint64_t cr0; ++ uint64_t cr2; ++ uint64_t cr3; ++ uint64_t cr4; ++ ++ uint64_t dr0; ++ uint64_t dr1; ++ uint64_t dr2; ++ uint64_t dr3; ++ uint64_t dr6; ++ uint64_t dr7; ++ ++ uint32_t cs_sel; ++ uint32_t ds_sel; ++ uint32_t es_sel; ++ uint32_t fs_sel; ++ uint32_t gs_sel; ++ uint32_t ss_sel; ++ uint32_t tr_sel; ++ uint32_t ldtr_sel; ++ ++ uint32_t cs_limit; ++ uint32_t ds_limit; ++ uint32_t es_limit; ++ uint32_t fs_limit; ++ uint32_t gs_limit; ++ uint32_t ss_limit; ++ uint32_t tr_limit; ++ uint32_t ldtr_limit; ++ uint32_t idtr_limit; ++ uint32_t gdtr_limit; ++ ++ uint64_t cs_base; ++ uint64_t ds_base; ++ uint64_t es_base; ++ uint64_t fs_base; ++ uint64_t gs_base; ++ uint64_t ss_base; ++ uint64_t tr_base; ++ uint64_t ldtr_base; ++ uint64_t idtr_base; ++ uint64_t gdtr_base; ++ ++ uint32_t cs_arbytes; ++ uint32_t ds_arbytes; ++ uint32_t es_arbytes; ++ uint32_t fs_arbytes; ++ uint32_t gs_arbytes; ++ uint32_t ss_arbytes; ++ uint32_t tr_arbytes; ++ uint32_t ldtr_arbytes; ++ ++ uint32_t sysenter_cs; ++ uint32_t padding0; ++ ++ uint64_t sysenter_esp; ++ uint64_t sysenter_eip; ++ ++ /* msr for em64t */ ++ uint64_t shadow_gs; ++ ++ /* msr content saved/restored. */ ++ uint64_t msr_flags; ++ uint64_t msr_lstar; ++ uint64_t msr_star; ++ uint64_t msr_cstar; ++ uint64_t msr_syscall_mask; ++ uint64_t msr_efer; ++ ++ /* guest's idea of what rdtsc() would return */ ++ uint64_t tsc; ++ ++ /* pending event, if any */ ++ union { ++ uint32_t pending_event; ++ struct { ++ uint8_t pending_vector:8; ++ uint8_t pending_type:3; ++ uint8_t pending_error_valid:1; ++ uint32_t pending_reserved:19; ++ uint8_t pending_valid:1; ++ }; ++ }; ++ /* error code for pending event */ ++ uint32_t error_code; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(CPU, 2, struct hvm_hw_cpu); ++ ++ ++/* ++ * PIC ++ */ ++ ++struct hvm_hw_vpic { ++ /* IR line bitmasks. */ ++ uint8_t irr; ++ uint8_t imr; ++ uint8_t isr; ++ ++ /* Line IRx maps to IRQ irq_base+x */ ++ uint8_t irq_base; ++ ++ /* ++ * Where are we in ICW2-4 initialisation (0 means no init in progress)? ++ * Bits 0-1 (=x): Next write at A=1 sets ICW(x+1). ++ * Bit 2: ICW1.IC4 (1 == ICW4 included in init sequence) ++ * Bit 3: ICW1.SNGL (0 == ICW3 included in init sequence) ++ */ ++ uint8_t init_state:4; ++ ++ /* IR line with highest priority. */ ++ uint8_t priority_add:4; ++ ++ /* Reads from A=0 obtain ISR or IRR? */ ++ uint8_t readsel_isr:1; ++ ++ /* Reads perform a polling read? */ ++ uint8_t poll:1; ++ ++ /* Automatically clear IRQs from the ISR during INTA? */ ++ uint8_t auto_eoi:1; ++ ++ /* Automatically rotate IRQ priorities during AEOI? */ ++ uint8_t rotate_on_auto_eoi:1; ++ ++ /* Exclude slave inputs when considering in-service IRQs? */ ++ uint8_t special_fully_nested_mode:1; ++ ++ /* Special mask mode excludes masked IRs from AEOI and priority checks. */ ++ uint8_t special_mask_mode:1; ++ ++ /* Is this a master PIC or slave PIC? (NB. This is not programmable.) */ ++ uint8_t is_master:1; ++ ++ /* Edge/trigger selection. */ ++ uint8_t elcr; ++ ++ /* Virtual INT output. */ ++ uint8_t int_output; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(PIC, 3, struct hvm_hw_vpic); ++ ++ ++/* ++ * IO-APIC ++ */ ++ ++#ifdef __ia64__ ++#define VIOAPIC_IS_IOSAPIC 1 ++#define VIOAPIC_NUM_PINS 24 ++#else ++#define VIOAPIC_NUM_PINS 48 /* 16 ISA IRQs, 32 non-legacy PCI IRQS. */ ++#endif ++ ++struct hvm_hw_vioapic { ++ uint64_t base_address; ++ uint32_t ioregsel; ++ uint32_t id; ++ union vioapic_redir_entry ++ { ++ uint64_t bits; ++ struct { ++ uint8_t vector; ++ uint8_t delivery_mode:3; ++ uint8_t dest_mode:1; ++ uint8_t delivery_status:1; ++ uint8_t polarity:1; ++ uint8_t remote_irr:1; ++ uint8_t trig_mode:1; ++ uint8_t mask:1; ++ uint8_t reserve:7; ++#if !VIOAPIC_IS_IOSAPIC ++ uint8_t reserved[4]; ++ uint8_t dest_id; ++#else ++ uint8_t reserved[3]; ++ uint16_t dest_id; ++#endif ++ } fields; ++ } redirtbl[VIOAPIC_NUM_PINS]; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic); ++ ++ ++/* ++ * LAPIC ++ */ ++ ++struct hvm_hw_lapic { ++ uint64_t apic_base_msr; ++ uint32_t disabled; /* VLAPIC_xx_DISABLED */ ++ uint32_t timer_divisor; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(LAPIC, 5, struct hvm_hw_lapic); ++ ++struct hvm_hw_lapic_regs { ++ uint8_t data[1024]; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(LAPIC_REGS, 6, struct hvm_hw_lapic_regs); ++ ++ ++/* ++ * IRQs ++ */ ++ ++struct hvm_hw_pci_irqs { ++ /* ++ * Virtual interrupt wires for a single PCI bus. ++ * Indexed by: device*4 + INTx#. ++ */ ++ union { ++ DECLARE_BITMAP(i, 32*4); ++ uint64_t pad[2]; ++ }; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(PCI_IRQ, 7, struct hvm_hw_pci_irqs); ++ ++struct hvm_hw_isa_irqs { ++ /* ++ * Virtual interrupt wires for ISA devices. ++ * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing). ++ */ ++ union { ++ DECLARE_BITMAP(i, 16); ++ uint64_t pad[1]; ++ }; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(ISA_IRQ, 8, struct hvm_hw_isa_irqs); ++ ++struct hvm_hw_pci_link { ++ /* ++ * PCI-ISA interrupt router. ++ * Each PCI is 'wire-ORed' into one of four links using ++ * the traditional 'barber's pole' mapping ((device + INTx#) & 3). ++ * The router provides a programmable mapping from each link to a GSI. ++ */ ++ uint8_t route[4]; ++ uint8_t pad0[4]; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(PCI_LINK, 9, struct hvm_hw_pci_link); ++ ++/* ++ * PIT ++ */ ++ ++struct hvm_hw_pit { ++ struct hvm_hw_pit_channel { ++ uint32_t count; /* can be 65536 */ ++ uint16_t latched_count; ++ uint8_t count_latched; ++ uint8_t status_latched; ++ uint8_t status; ++ uint8_t read_state; ++ uint8_t write_state; ++ uint8_t write_latch; ++ uint8_t rw_mode; ++ uint8_t mode; ++ uint8_t bcd; /* not supported */ ++ uint8_t gate; /* timer start */ ++ } channels[3]; /* 3 x 16 bytes */ ++ uint32_t speaker_data_on; ++ uint32_t pad0; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(PIT, 10, struct hvm_hw_pit); ++ ++ ++/* ++ * RTC ++ */ ++ ++#define RTC_CMOS_SIZE 14 ++struct hvm_hw_rtc { ++ /* CMOS bytes */ ++ uint8_t cmos_data[RTC_CMOS_SIZE]; ++ /* Index register for 2-part operations */ ++ uint8_t cmos_index; ++ uint8_t pad0; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(RTC, 11, struct hvm_hw_rtc); ++ ++ ++/* ++ * HPET ++ */ ++ ++#define HPET_TIMER_NUM 3 /* 3 timers supported now */ ++struct hvm_hw_hpet { ++ /* Memory-mapped, software visible registers */ ++ uint64_t capability; /* capabilities */ ++ uint64_t res0; /* reserved */ ++ uint64_t config; /* configuration */ ++ uint64_t res1; /* reserved */ ++ uint64_t isr; /* interrupt status reg */ ++ uint64_t res2[25]; /* reserved */ ++ uint64_t mc64; /* main counter */ ++ uint64_t res3; /* reserved */ ++ struct { /* timers */ ++ uint64_t config; /* configuration/cap */ ++ uint64_t cmp; /* comparator */ ++ uint64_t fsb; /* FSB route, not supported now */ ++ uint64_t res4; /* reserved */ ++ } timers[HPET_TIMER_NUM]; ++ uint64_t res5[4*(24-HPET_TIMER_NUM)]; /* reserved, up to 0x3ff */ ++ ++ /* Hidden register state */ ++ uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */ ++}; ++ ++DECLARE_HVM_SAVE_TYPE(HPET, 12, struct hvm_hw_hpet); ++ ++ ++/* ++ * PM timer ++ */ ++ ++struct hvm_hw_pmtimer { ++ uint32_t tmr_val; /* PM_TMR_BLK.TMR_VAL: 32bit free-running counter */ ++ uint16_t pm1a_sts; /* PM1a_EVT_BLK.PM1a_STS: status register */ ++ uint16_t pm1a_en; /* PM1a_EVT_BLK.PM1a_EN: enable register */ ++}; ++ ++DECLARE_HVM_SAVE_TYPE(PMTIMER, 13, struct hvm_hw_pmtimer); ++ ++/* ++ * MTRR MSRs ++ */ ++ ++struct hvm_hw_mtrr { ++#define MTRR_VCNT 8 ++#define NUM_FIXED_MSR 11 ++ uint64_t msr_pat_cr; ++ /* mtrr physbase & physmask msr pair*/ ++ uint64_t msr_mtrr_var[MTRR_VCNT*2]; ++ uint64_t msr_mtrr_fixed[NUM_FIXED_MSR]; ++ uint64_t msr_mtrr_cap; ++ uint64_t msr_mtrr_def_type; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr); ++ ++/* ++ * Viridian hypervisor context. ++ */ ++ ++struct hvm_viridian_context { ++ uint64_t hypercall_gpa; ++ uint64_t guest_os_id; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(VIRIDIAN, 15, struct hvm_viridian_context); ++ ++/* ++ * Largest type-code in use ++ */ ++#define HVM_SAVE_CODE_MAX 15 ++ ++#endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */ +Index: head-2008-11-25/include/xen/interface/arch-x86/xen-mca.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/arch-x86/xen-mca.h 2008-09-01 12:07:31.000000000 +0200 +@@ -0,0 +1,279 @@ ++/****************************************************************************** ++ * arch-x86/mca.h ++ * ++ * Contributed by Advanced Micro Devices, Inc. ++ * Author: Christoph Egger ++ * ++ * Guest OS machine check interface to x86 Xen. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++/* Full MCA functionality has the following Usecases from the guest side: ++ * ++ * Must have's: ++ * 1. Dom0 and DomU register machine check trap callback handlers ++ * (already done via "set_trap_table" hypercall) ++ * 2. Dom0 registers machine check event callback handler ++ * (doable via EVTCHNOP_bind_virq) ++ * 3. Dom0 and DomU fetches machine check data ++ * 4. Dom0 wants Xen to notify a DomU ++ * 5. Dom0 gets DomU ID from physical address ++ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy") ++ * ++ * Nice to have's: ++ * 7. Dom0 wants Xen to deactivate a physical CPU ++ * This is better done as separate task, physical CPU hotplugging, ++ * and hypercall(s) should be sysctl's ++ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to ++ * move a DomU (or Dom0 itself) away from a malicious page ++ * producing correctable errors. ++ * 9. offlining physical page: ++ * Xen free's and never re-uses a certain physical page. ++ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's ++ * and tell Xen to trigger a machine check ++ */ ++ ++#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__ ++#define __XEN_PUBLIC_ARCH_X86_MCA_H__ ++ ++/* Hypercall */ ++#define __HYPERVISOR_mca __HYPERVISOR_arch_0 ++ ++#define XEN_MCA_INTERFACE_VERSION 0x03000001 ++ ++/* IN: Dom0 calls hypercall from MC event handler. */ ++#define XEN_MC_CORRECTABLE 0x0 ++/* IN: Dom0/DomU calls hypercall from MC trap handler. */ ++#define XEN_MC_TRAP 0x1 ++/* XEN_MC_CORRECTABLE and XEN_MC_TRAP are mutually exclusive. */ ++ ++/* OUT: All is ok */ ++#define XEN_MC_OK 0x0 ++/* OUT: Domain could not fetch data. */ ++#define XEN_MC_FETCHFAILED 0x1 ++/* OUT: There was no machine check data to fetch. */ ++#define XEN_MC_NODATA 0x2 ++/* OUT: Between notification time and this hypercall an other ++ * (most likely) correctable error happened. The fetched data, ++ * does not match the original machine check data. */ ++#define XEN_MC_NOMATCH 0x4 ++ ++/* OUT: DomU did not register MC NMI handler. Try something else. */ ++#define XEN_MC_CANNOTHANDLE 0x8 ++/* OUT: Notifying DomU failed. Retry later or try something else. */ ++#define XEN_MC_NOTDELIVERED 0x10 ++/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */ ++ ++ ++#ifndef __ASSEMBLY__ ++ ++#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */ ++ ++/* ++ * Machine Check Architecure: ++ * structs are read-only and used to report all kinds of ++ * correctable and uncorrectable errors detected by the HW. ++ * Dom0 and DomU: register a handler to get notified. ++ * Dom0 only: Correctable errors are reported via VIRQ_MCA ++ * Dom0 and DomU: Uncorrectable errors are reported via nmi handlers ++ */ ++#define MC_TYPE_GLOBAL 0 ++#define MC_TYPE_BANK 1 ++#define MC_TYPE_EXTENDED 2 ++ ++struct mcinfo_common { ++ uint16_t type; /* structure type */ ++ uint16_t size; /* size of this struct in bytes */ ++}; ++ ++ ++#define MC_FLAG_CORRECTABLE (1 << 0) ++#define MC_FLAG_UNCORRECTABLE (1 << 1) ++ ++/* contains global x86 mc information */ ++struct mcinfo_global { ++ struct mcinfo_common common; ++ ++ /* running domain at the time in error (most likely the impacted one) */ ++ uint16_t mc_domid; ++ uint32_t mc_socketid; /* physical socket of the physical core */ ++ uint16_t mc_coreid; /* physical impacted core */ ++ uint16_t mc_core_threadid; /* core thread of physical core */ ++ uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ ++ uint64_t mc_gstatus; /* global status */ ++ uint32_t mc_flags; ++}; ++ ++/* contains bank local x86 mc information */ ++struct mcinfo_bank { ++ struct mcinfo_common common; ++ ++ uint16_t mc_bank; /* bank nr */ ++ uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0 ++ * and if mc_addr is valid. Never valid on DomU. */ ++ uint64_t mc_status; /* bank status */ ++ uint64_t mc_addr; /* bank address, only valid ++ * if addr bit is set in mc_status */ ++ uint64_t mc_misc; ++}; ++ ++ ++struct mcinfo_msr { ++ uint64_t reg; /* MSR */ ++ uint64_t value; /* MSR value */ ++}; ++ ++/* contains mc information from other ++ * or additional mc MSRs */ ++struct mcinfo_extended { ++ struct mcinfo_common common; ++ ++ /* You can fill up to five registers. ++ * If you need more, then use this structure ++ * multiple times. */ ++ ++ uint32_t mc_msrs; /* Number of msr with valid values. */ ++ struct mcinfo_msr mc_msr[5]; ++}; ++ ++#define MCINFO_HYPERCALLSIZE 1024 ++#define MCINFO_MAXSIZE 768 ++ ++struct mc_info { ++ /* Number of mcinfo_* entries in mi_data */ ++ uint32_t mi_nentries; ++ ++ uint8_t mi_data[MCINFO_MAXSIZE - sizeof(uint32_t)]; ++}; ++typedef struct mc_info mc_info_t; ++ ++ ++ ++/* ++ * OS's should use these instead of writing their own lookup function ++ * each with its own bugs and drawbacks. ++ * We use macros instead of static inline functions to allow guests ++ * to include this header in assembly files (*.S). ++ */ ++/* Prototype: ++ * uint32_t x86_mcinfo_nentries(struct mc_info *mi); ++ */ ++#define x86_mcinfo_nentries(_mi) \ ++ (_mi)->mi_nentries ++/* Prototype: ++ * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi); ++ */ ++#define x86_mcinfo_first(_mi) \ ++ (struct mcinfo_common *)((_mi)->mi_data) ++/* Prototype: ++ * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic); ++ */ ++#define x86_mcinfo_next(_mic) \ ++ (struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size) ++ ++/* Prototype: ++ * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type); ++ */ ++#define x86_mcinfo_lookup(_ret, _mi, _type) \ ++ do { \ ++ uint32_t found, i; \ ++ struct mcinfo_common *_mic; \ ++ \ ++ found = 0; \ ++ (_ret) = NULL; \ ++ if (_mi == NULL) break; \ ++ _mic = x86_mcinfo_first(_mi); \ ++ for (i = 0; i < x86_mcinfo_nentries(_mi); i++) { \ ++ if (_mic->type == (_type)) { \ ++ found = 1; \ ++ break; \ ++ } \ ++ _mic = x86_mcinfo_next(_mic); \ ++ } \ ++ (_ret) = found ? _mic : NULL; \ ++ } while (0) ++ ++ ++/* Usecase 1 ++ * Register machine check trap callback handler ++ * (already done via "set_trap_table" hypercall) ++ */ ++ ++/* Usecase 2 ++ * Dom0 registers machine check event callback handler ++ * done by EVTCHNOP_bind_virq ++ */ ++ ++/* Usecase 3 ++ * Fetch machine check data from hypervisor. ++ * Note, this hypercall is special, because both Dom0 and DomU must use this. ++ */ ++#define XEN_MC_fetch 1 ++struct xen_mc_fetch { ++ /* IN/OUT variables. */ ++ uint32_t flags; ++ ++/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */ ++/* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED, XEN_MC_NODATA, XEN_MC_NOMATCH */ ++ ++ /* OUT variables. */ ++ uint32_t fetch_idx; /* only useful for Dom0 for the notify hypercall */ ++ struct mc_info mc_info; ++}; ++typedef struct xen_mc_fetch xen_mc_fetch_t; ++DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t); ++ ++ ++/* Usecase 4 ++ * This tells the hypervisor to notify a DomU about the machine check error ++ */ ++#define XEN_MC_notifydomain 2 ++struct xen_mc_notifydomain { ++ /* IN variables. */ ++ uint16_t mc_domid; /* The unprivileged domain to notify. */ ++ uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify. ++ * Usually echo'd value from the fetch hypercall. */ ++ uint32_t fetch_idx; /* echo'd value from the fetch hypercall. */ ++ ++ /* IN/OUT variables. */ ++ uint32_t flags; ++ ++/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */ ++/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */ ++}; ++typedef struct xen_mc_notifydomain xen_mc_notifydomain_t; ++DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t); ++ ++ ++struct xen_mc { ++ uint32_t cmd; ++ uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */ ++ union { ++ struct xen_mc_fetch mc_fetch; ++ struct xen_mc_notifydomain mc_notifydomain; ++ uint8_t pad[MCINFO_HYPERCALLSIZE]; ++ } u; ++}; ++typedef struct xen_mc xen_mc_t; ++DEFINE_XEN_GUEST_HANDLE(xen_mc_t); ++ ++#endif /* __ASSEMBLY__ */ ++ ++#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */ +Index: head-2008-11-25/include/xen/interface/arch-x86/xen-x86_32.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/arch-x86/xen-x86_32.h 2008-07-21 11:00:33.000000000 +0200 +@@ -0,0 +1,180 @@ ++/****************************************************************************** ++ * xen-x86_32.h ++ * ++ * Guest OS interface to x86 32-bit Xen. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2004-2007, K A Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ ++#define __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ ++ ++/* ++ * Hypercall interface: ++ * Input: %ebx, %ecx, %edx, %esi, %edi (arguments 1-5) ++ * Output: %eax ++ * Access is via hypercall page (set up by guest loader or via a Xen MSR): ++ * call hypercall_page + hypercall-number * 32 ++ * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx) ++ */ ++ ++#if __XEN_INTERFACE_VERSION__ < 0x00030203 ++/* ++ * Legacy hypercall interface: ++ * As above, except the entry sequence to the hypervisor is: ++ * mov $hypercall-number*32,%eax ; int $0x82 ++ */ ++#define TRAP_INSTR "int $0x82" ++#endif ++ ++/* ++ * These flat segments are in the Xen-private section of every GDT. Since these ++ * are also present in the initial GDT, many OSes will be able to avoid ++ * installing their own GDT. ++ */ ++#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ ++#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ ++#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ ++#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ ++#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ ++#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ ++ ++#define FLAT_KERNEL_CS FLAT_RING1_CS ++#define FLAT_KERNEL_DS FLAT_RING1_DS ++#define FLAT_KERNEL_SS FLAT_RING1_SS ++#define FLAT_USER_CS FLAT_RING3_CS ++#define FLAT_USER_DS FLAT_RING3_DS ++#define FLAT_USER_SS FLAT_RING3_SS ++ ++#define __HYPERVISOR_VIRT_START_PAE 0xF5800000 ++#define __MACH2PHYS_VIRT_START_PAE 0xF5800000 ++#define __MACH2PHYS_VIRT_END_PAE 0xF6800000 ++#define HYPERVISOR_VIRT_START_PAE \ ++ mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE) ++#define MACH2PHYS_VIRT_START_PAE \ ++ mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE) ++#define MACH2PHYS_VIRT_END_PAE \ ++ mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE) ++ ++/* Non-PAE bounds are obsolete. */ ++#define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000 ++#define __MACH2PHYS_VIRT_START_NONPAE 0xFC000000 ++#define __MACH2PHYS_VIRT_END_NONPAE 0xFC400000 ++#define HYPERVISOR_VIRT_START_NONPAE \ ++ mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE) ++#define MACH2PHYS_VIRT_START_NONPAE \ ++ mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE) ++#define MACH2PHYS_VIRT_END_NONPAE \ ++ mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE) ++ ++#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE ++#define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_PAE ++#define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_PAE ++ ++#ifndef HYPERVISOR_VIRT_START ++#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) ++#endif ++ ++#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) ++#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) ++#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2) ++#ifndef machine_to_phys_mapping ++#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START) ++#endif ++ ++/* 32-/64-bit invariability for control interfaces (domctl/sysctl). */ ++#if defined(__XEN__) || defined(__XEN_TOOLS__) ++#undef ___DEFINE_XEN_GUEST_HANDLE ++#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ ++ typedef struct { type *p; } \ ++ __guest_handle_ ## name; \ ++ typedef struct { union { type *p; uint64_aligned_t q; }; } \ ++ __guest_handle_64_ ## name ++#undef set_xen_guest_handle ++#define set_xen_guest_handle(hnd, val) \ ++ do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \ ++ (hnd).p = val; \ ++ } while ( 0 ) ++#define uint64_aligned_t uint64_t __attribute__((aligned(8))) ++#define __XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name ++#define XEN_GUEST_HANDLE_64(name) __XEN_GUEST_HANDLE_64(name) ++#endif ++ ++#ifndef __ASSEMBLY__ ++ ++struct cpu_user_regs { ++ uint32_t ebx; ++ uint32_t ecx; ++ uint32_t edx; ++ uint32_t esi; ++ uint32_t edi; ++ uint32_t ebp; ++ uint32_t eax; ++ uint16_t error_code; /* private */ ++ uint16_t entry_vector; /* private */ ++ uint32_t eip; ++ uint16_t cs; ++ uint8_t saved_upcall_mask; ++ uint8_t _pad0; ++ uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ ++ uint32_t esp; ++ uint16_t ss, _pad1; ++ uint16_t es, _pad2; ++ uint16_t ds, _pad3; ++ uint16_t fs, _pad4; ++ uint16_t gs, _pad5; ++}; ++typedef struct cpu_user_regs cpu_user_regs_t; ++DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); ++ ++/* ++ * Page-directory addresses above 4GB do not fit into architectural %cr3. ++ * When accessing %cr3, or equivalent field in vcpu_guest_context, guests ++ * must use the following accessor macros to pack/unpack valid MFNs. ++ */ ++#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) ++#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) ++ ++struct arch_vcpu_info { ++ unsigned long cr2; ++ unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */ ++}; ++typedef struct arch_vcpu_info arch_vcpu_info_t; ++ ++struct xen_callback { ++ unsigned long cs; ++ unsigned long eip; ++}; ++typedef struct xen_callback xen_callback_t; ++ ++#endif /* !__ASSEMBLY__ */ ++ ++#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +Index: head-2008-11-25/include/xen/interface/arch-x86/xen-x86_64.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/arch-x86/xen-x86_64.h 2008-04-02 12:34:02.000000000 +0200 +@@ -0,0 +1,212 @@ ++/****************************************************************************** ++ * xen-x86_64.h ++ * ++ * Guest OS interface to x86 64-bit Xen. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2004-2006, K A Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ ++#define __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ ++ ++/* ++ * Hypercall interface: ++ * Input: %rdi, %rsi, %rdx, %r10, %r8 (arguments 1-5) ++ * Output: %rax ++ * Access is via hypercall page (set up by guest loader or via a Xen MSR): ++ * call hypercall_page + hypercall-number * 32 ++ * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi) ++ */ ++ ++#if __XEN_INTERFACE_VERSION__ < 0x00030203 ++/* ++ * Legacy hypercall interface: ++ * As above, except the entry sequence to the hypervisor is: ++ * mov $hypercall-number*32,%eax ; syscall ++ * Clobbered: %rcx, %r11, argument registers (as above) ++ */ ++#define TRAP_INSTR "syscall" ++#endif ++ ++/* ++ * 64-bit segment selectors ++ * These flat segments are in the Xen-private section of every GDT. Since these ++ * are also present in the initial GDT, many OSes will be able to avoid ++ * installing their own GDT. ++ */ ++ ++#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */ ++#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */ ++#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */ ++#define FLAT_RING3_DS64 0x0000 /* NULL selector */ ++#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */ ++#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */ ++ ++#define FLAT_KERNEL_DS64 FLAT_RING3_DS64 ++#define FLAT_KERNEL_DS32 FLAT_RING3_DS32 ++#define FLAT_KERNEL_DS FLAT_KERNEL_DS64 ++#define FLAT_KERNEL_CS64 FLAT_RING3_CS64 ++#define FLAT_KERNEL_CS32 FLAT_RING3_CS32 ++#define FLAT_KERNEL_CS FLAT_KERNEL_CS64 ++#define FLAT_KERNEL_SS64 FLAT_RING3_SS64 ++#define FLAT_KERNEL_SS32 FLAT_RING3_SS32 ++#define FLAT_KERNEL_SS FLAT_KERNEL_SS64 ++ ++#define FLAT_USER_DS64 FLAT_RING3_DS64 ++#define FLAT_USER_DS32 FLAT_RING3_DS32 ++#define FLAT_USER_DS FLAT_USER_DS64 ++#define FLAT_USER_CS64 FLAT_RING3_CS64 ++#define FLAT_USER_CS32 FLAT_RING3_CS32 ++#define FLAT_USER_CS FLAT_USER_CS64 ++#define FLAT_USER_SS64 FLAT_RING3_SS64 ++#define FLAT_USER_SS32 FLAT_RING3_SS32 ++#define FLAT_USER_SS FLAT_USER_SS64 ++ ++#define __HYPERVISOR_VIRT_START 0xFFFF800000000000 ++#define __HYPERVISOR_VIRT_END 0xFFFF880000000000 ++#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 ++#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 ++ ++#ifndef HYPERVISOR_VIRT_START ++#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) ++#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) ++#endif ++ ++#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) ++#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) ++#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) ++#ifndef machine_to_phys_mapping ++#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) ++#endif ++ ++/* ++ * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) ++ * @which == SEGBASE_* ; @base == 64-bit base address ++ * Returns 0 on success. ++ */ ++#define SEGBASE_FS 0 ++#define SEGBASE_GS_USER 1 ++#define SEGBASE_GS_KERNEL 2 ++#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ ++ ++/* ++ * int HYPERVISOR_iret(void) ++ * All arguments are on the kernel stack, in the following format. ++ * Never returns if successful. Current kernel context is lost. ++ * The saved CS is mapped as follows: ++ * RING0 -> RING3 kernel mode. ++ * RING1 -> RING3 kernel mode. ++ * RING2 -> RING3 kernel mode. ++ * RING3 -> RING3 user mode. ++ * However RING0 indicates that the guest kernel should return to iteself ++ * directly with ++ * orb $3,1*8(%rsp) ++ * iretq ++ * If flags contains VGCF_in_syscall: ++ * Restore RAX, RIP, RFLAGS, RSP. ++ * Discard R11, RCX, CS, SS. ++ * Otherwise: ++ * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. ++ * All other registers are saved on hypercall entry and restored to user. ++ */ ++/* Guest exited in SYSCALL context? Return to guest with SYSRET? */ ++#define _VGCF_in_syscall 8 ++#define VGCF_in_syscall (1<<_VGCF_in_syscall) ++#define VGCF_IN_SYSCALL VGCF_in_syscall ++ ++#ifndef __ASSEMBLY__ ++ ++struct iret_context { ++ /* Top of stack (%rsp at point of hypercall). */ ++ uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; ++ /* Bottom of iret stack frame. */ ++}; ++ ++#if defined(__GNUC__) && !defined(__STRICT_ANSI__) ++/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ ++#define __DECL_REG(name) union { \ ++ uint64_t r ## name, e ## name; \ ++ uint32_t _e ## name; \ ++} ++#else ++/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */ ++#define __DECL_REG(name) uint64_t r ## name ++#endif ++ ++struct cpu_user_regs { ++ uint64_t r15; ++ uint64_t r14; ++ uint64_t r13; ++ uint64_t r12; ++ __DECL_REG(bp); ++ __DECL_REG(bx); ++ uint64_t r11; ++ uint64_t r10; ++ uint64_t r9; ++ uint64_t r8; ++ __DECL_REG(ax); ++ __DECL_REG(cx); ++ __DECL_REG(dx); ++ __DECL_REG(si); ++ __DECL_REG(di); ++ uint32_t error_code; /* private */ ++ uint32_t entry_vector; /* private */ ++ __DECL_REG(ip); ++ uint16_t cs, _pad0[1]; ++ uint8_t saved_upcall_mask; ++ uint8_t _pad1[3]; ++ __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */ ++ __DECL_REG(sp); ++ uint16_t ss, _pad2[3]; ++ uint16_t es, _pad3[3]; ++ uint16_t ds, _pad4[3]; ++ uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ ++ uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ ++}; ++typedef struct cpu_user_regs cpu_user_regs_t; ++DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); ++ ++#undef __DECL_REG ++ ++#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12) ++#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12) ++ ++struct arch_vcpu_info { ++ unsigned long cr2; ++ unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ ++}; ++typedef struct arch_vcpu_info arch_vcpu_info_t; ++ ++typedef unsigned long xen_callback_t; ++ ++#endif /* !__ASSEMBLY__ */ ++ ++#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +Index: head-2008-11-25/include/xen/interface/arch-x86/xen.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/arch-x86/xen.h 2008-09-01 12:07:31.000000000 +0200 +@@ -0,0 +1,204 @@ ++/****************************************************************************** ++ * arch-x86/xen.h ++ * ++ * Guest OS interface to x86 Xen. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2004-2006, K A Fraser ++ */ ++ ++#include "../xen.h" ++ ++#ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__ ++#define __XEN_PUBLIC_ARCH_X86_XEN_H__ ++ ++/* Structural guest handles introduced in 0x00030201. */ ++#if __XEN_INTERFACE_VERSION__ >= 0x00030201 ++#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ ++ typedef struct { type *p; } __guest_handle_ ## name ++#else ++#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ ++ typedef type * __guest_handle_ ## name ++#endif ++ ++#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ ++ ___DEFINE_XEN_GUEST_HANDLE(name, type); \ ++ ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type) ++#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) ++#define __XEN_GUEST_HANDLE(name) __guest_handle_ ## name ++#define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) ++#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) ++#ifdef __XEN_TOOLS__ ++#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) ++#endif ++ ++#if defined(__i386__) ++#include "xen-x86_32.h" ++#elif defined(__x86_64__) ++#include "xen-x86_64.h" ++#endif ++ ++#ifndef __ASSEMBLY__ ++typedef unsigned long xen_pfn_t; ++#define PRI_xen_pfn "lx" ++#endif ++ ++/* ++ * SEGMENT DESCRIPTOR TABLES ++ */ ++/* ++ * A number of GDT entries are reserved by Xen. These are not situated at the ++ * start of the GDT because some stupid OSes export hard-coded selector values ++ * in their ABI. These hard-coded values are always near the start of the GDT, ++ * so Xen places itself out of the way, at the far end of the GDT. ++ */ ++#define FIRST_RESERVED_GDT_PAGE 14 ++#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) ++#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) ++ ++/* Maximum number of virtual CPUs in multi-processor guests. */ ++#define MAX_VIRT_CPUS 32 ++ ++ ++/* Machine check support */ ++#include "xen-mca.h" ++ ++#ifndef __ASSEMBLY__ ++ ++typedef unsigned long xen_ulong_t; ++ ++/* ++ * Send an array of these to HYPERVISOR_set_trap_table(). ++ * The privilege level specifies which modes may enter a trap via a software ++ * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate ++ * privilege levels as follows: ++ * Level == 0: Noone may enter ++ * Level == 1: Kernel may enter ++ * Level == 2: Kernel may enter ++ * Level == 3: Everyone may enter ++ */ ++#define TI_GET_DPL(_ti) ((_ti)->flags & 3) ++#define TI_GET_IF(_ti) ((_ti)->flags & 4) ++#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) ++#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) ++struct trap_info { ++ uint8_t vector; /* exception vector */ ++ uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ ++ uint16_t cs; /* code selector */ ++ unsigned long address; /* code offset */ ++}; ++typedef struct trap_info trap_info_t; ++DEFINE_XEN_GUEST_HANDLE(trap_info_t); ++ ++typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ ++ ++/* ++ * The following is all CPU context. Note that the fpu_ctxt block is filled ++ * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. ++ */ ++struct vcpu_guest_context { ++ /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ ++ struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ ++#define VGCF_I387_VALID (1<<0) ++#define VGCF_IN_KERNEL (1<<2) ++#define _VGCF_i387_valid 0 ++#define VGCF_i387_valid (1<<_VGCF_i387_valid) ++#define _VGCF_in_kernel 2 ++#define VGCF_in_kernel (1<<_VGCF_in_kernel) ++#define _VGCF_failsafe_disables_events 3 ++#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) ++#define _VGCF_syscall_disables_events 4 ++#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) ++#define _VGCF_online 5 ++#define VGCF_online (1<<_VGCF_online) ++ unsigned long flags; /* VGCF_* flags */ ++ struct cpu_user_regs user_regs; /* User-level CPU registers */ ++ struct trap_info trap_ctxt[256]; /* Virtual IDT */ ++ unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ ++ unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ ++ unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ ++ /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ ++ unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ ++ unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ ++#ifdef __i386__ ++ unsigned long event_callback_cs; /* CS:EIP of event callback */ ++ unsigned long event_callback_eip; ++ unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ ++ unsigned long failsafe_callback_eip; ++#else ++ unsigned long event_callback_eip; ++ unsigned long failsafe_callback_eip; ++#ifdef __XEN__ ++ union { ++ unsigned long syscall_callback_eip; ++ struct { ++ unsigned int event_callback_cs; /* compat CS of event cb */ ++ unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */ ++ }; ++ }; ++#else ++ unsigned long syscall_callback_eip; ++#endif ++#endif ++ unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ ++#ifdef __x86_64__ ++ /* Segment base addresses. */ ++ uint64_t fs_base; ++ uint64_t gs_base_kernel; ++ uint64_t gs_base_user; ++#endif ++}; ++typedef struct vcpu_guest_context vcpu_guest_context_t; ++DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); ++ ++struct arch_shared_info { ++ unsigned long max_pfn; /* max pfn that appears in table */ ++ /* Frame containing list of mfns containing list of mfns containing p2m. */ ++ xen_pfn_t pfn_to_mfn_frame_list_list; ++ unsigned long nmi_reason; ++ uint64_t pad[32]; ++}; ++typedef struct arch_shared_info arch_shared_info_t; ++ ++#endif /* !__ASSEMBLY__ */ ++ ++/* ++ * Prefix forces emulation of some non-trapping instructions. ++ * Currently only CPUID. ++ */ ++#ifdef __ASSEMBLY__ ++#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; ++#define XEN_CPUID XEN_EMULATE_PREFIX cpuid ++#else ++#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " ++#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" ++#endif ++ ++#endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +Index: head-2008-11-25/include/xen/interface/arch-x86_32.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/arch-x86_32.h 2007-06-12 13:14:19.000000000 +0200 +@@ -0,0 +1,27 @@ ++/****************************************************************************** ++ * arch-x86_32.h ++ * ++ * Guest OS interface to x86 32-bit Xen. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2004-2006, K A Fraser ++ */ ++ ++#include "arch-x86/xen.h" +Index: head-2008-11-25/include/xen/interface/arch-x86_64.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/arch-x86_64.h 2007-06-12 13:14:19.000000000 +0200 +@@ -0,0 +1,27 @@ ++/****************************************************************************** ++ * arch-x86_64.h ++ * ++ * Guest OS interface to x86 64-bit Xen. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2004-2006, K A Fraser ++ */ ++ ++#include "arch-x86/xen.h" +Index: head-2008-11-25/include/xen/interface/dom0_ops.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/dom0_ops.h 2007-06-12 13:14:19.000000000 +0200 +@@ -0,0 +1,120 @@ ++/****************************************************************************** ++ * dom0_ops.h ++ * ++ * Process command requests from domain-0 guest OS. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2002-2003, B Dragovic ++ * Copyright (c) 2002-2006, K Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_DOM0_OPS_H__ ++#define __XEN_PUBLIC_DOM0_OPS_H__ ++ ++#include "xen.h" ++#include "platform.h" ++ ++#if __XEN_INTERFACE_VERSION__ >= 0x00030204 ++#error "dom0_ops.h is a compatibility interface only" ++#endif ++ ++#define DOM0_INTERFACE_VERSION XENPF_INTERFACE_VERSION ++ ++#define DOM0_SETTIME XENPF_settime ++#define dom0_settime xenpf_settime ++#define dom0_settime_t xenpf_settime_t ++ ++#define DOM0_ADD_MEMTYPE XENPF_add_memtype ++#define dom0_add_memtype xenpf_add_memtype ++#define dom0_add_memtype_t xenpf_add_memtype_t ++ ++#define DOM0_DEL_MEMTYPE XENPF_del_memtype ++#define dom0_del_memtype xenpf_del_memtype ++#define dom0_del_memtype_t xenpf_del_memtype_t ++ ++#define DOM0_READ_MEMTYPE XENPF_read_memtype ++#define dom0_read_memtype xenpf_read_memtype ++#define dom0_read_memtype_t xenpf_read_memtype_t ++ ++#define DOM0_MICROCODE XENPF_microcode_update ++#define dom0_microcode xenpf_microcode_update ++#define dom0_microcode_t xenpf_microcode_update_t ++ ++#define DOM0_PLATFORM_QUIRK XENPF_platform_quirk ++#define dom0_platform_quirk xenpf_platform_quirk ++#define dom0_platform_quirk_t xenpf_platform_quirk_t ++ ++typedef uint64_t cpumap_t; ++ ++/* Unsupported legacy operation -- defined for API compatibility. */ ++#define DOM0_MSR 15 ++struct dom0_msr { ++ /* IN variables. */ ++ uint32_t write; ++ cpumap_t cpu_mask; ++ uint32_t msr; ++ uint32_t in1; ++ uint32_t in2; ++ /* OUT variables. */ ++ uint32_t out1; ++ uint32_t out2; ++}; ++typedef struct dom0_msr dom0_msr_t; ++DEFINE_XEN_GUEST_HANDLE(dom0_msr_t); ++ ++/* Unsupported legacy operation -- defined for API compatibility. */ ++#define DOM0_PHYSICAL_MEMORY_MAP 40 ++struct dom0_memory_map_entry { ++ uint64_t start, end; ++ uint32_t flags; /* reserved */ ++ uint8_t is_ram; ++}; ++typedef struct dom0_memory_map_entry dom0_memory_map_entry_t; ++DEFINE_XEN_GUEST_HANDLE(dom0_memory_map_entry_t); ++ ++struct dom0_op { ++ uint32_t cmd; ++ uint32_t interface_version; /* DOM0_INTERFACE_VERSION */ ++ union { ++ struct dom0_msr msr; ++ struct dom0_settime settime; ++ struct dom0_add_memtype add_memtype; ++ struct dom0_del_memtype del_memtype; ++ struct dom0_read_memtype read_memtype; ++ struct dom0_microcode microcode; ++ struct dom0_platform_quirk platform_quirk; ++ struct dom0_memory_map_entry physical_memory_map; ++ uint8_t pad[128]; ++ } u; ++}; ++typedef struct dom0_op dom0_op_t; ++DEFINE_XEN_GUEST_HANDLE(dom0_op_t); ++ ++#endif /* __XEN_PUBLIC_DOM0_OPS_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +Index: head-2008-11-25/include/xen/interface/domctl.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/domctl.h 2008-10-29 09:55:56.000000000 +0100 +@@ -0,0 +1,680 @@ ++/****************************************************************************** ++ * domctl.h ++ * ++ * Domain management operations. For use by node control stack. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2002-2003, B Dragovic ++ * Copyright (c) 2002-2006, K Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_DOMCTL_H__ ++#define __XEN_PUBLIC_DOMCTL_H__ ++ ++#if !defined(__XEN__) && !defined(__XEN_TOOLS__) ++#error "domctl operations are intended for use by node control tools only" ++#endif ++ ++#include "xen.h" ++ ++#define XEN_DOMCTL_INTERFACE_VERSION 0x00000005 ++ ++struct xenctl_cpumap { ++ XEN_GUEST_HANDLE_64(uint8) bitmap; ++ uint32_t nr_cpus; ++}; ++ ++/* ++ * NB. xen_domctl.domain is an IN/OUT parameter for this operation. ++ * If it is specified as zero, an id is auto-allocated and returned. ++ */ ++#define XEN_DOMCTL_createdomain 1 ++struct xen_domctl_createdomain { ++ /* IN parameters */ ++ uint32_t ssidref; ++ xen_domain_handle_t handle; ++ /* Is this an HVM guest (as opposed to a PV guest)? */ ++#define _XEN_DOMCTL_CDF_hvm_guest 0 ++#define XEN_DOMCTL_CDF_hvm_guest (1U<<_XEN_DOMCTL_CDF_hvm_guest) ++ /* Use hardware-assisted paging if available? */ ++#define _XEN_DOMCTL_CDF_hap 1 ++#define XEN_DOMCTL_CDF_hap (1U<<_XEN_DOMCTL_CDF_hap) ++ uint32_t flags; ++}; ++typedef struct xen_domctl_createdomain xen_domctl_createdomain_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t); ++ ++#define XEN_DOMCTL_destroydomain 2 ++#define XEN_DOMCTL_pausedomain 3 ++#define XEN_DOMCTL_unpausedomain 4 ++#define XEN_DOMCTL_resumedomain 27 ++ ++#define XEN_DOMCTL_getdomaininfo 5 ++struct xen_domctl_getdomaininfo { ++ /* OUT variables. */ ++ domid_t domain; /* Also echoed in domctl.domain */ ++ /* Domain is scheduled to die. */ ++#define _XEN_DOMINF_dying 0 ++#define XEN_DOMINF_dying (1U<<_XEN_DOMINF_dying) ++ /* Domain is an HVM guest (as opposed to a PV guest). */ ++#define _XEN_DOMINF_hvm_guest 1 ++#define XEN_DOMINF_hvm_guest (1U<<_XEN_DOMINF_hvm_guest) ++ /* The guest OS has shut down. */ ++#define _XEN_DOMINF_shutdown 2 ++#define XEN_DOMINF_shutdown (1U<<_XEN_DOMINF_shutdown) ++ /* Currently paused by control software. */ ++#define _XEN_DOMINF_paused 3 ++#define XEN_DOMINF_paused (1U<<_XEN_DOMINF_paused) ++ /* Currently blocked pending an event. */ ++#define _XEN_DOMINF_blocked 4 ++#define XEN_DOMINF_blocked (1U<<_XEN_DOMINF_blocked) ++ /* Domain is currently running. */ ++#define _XEN_DOMINF_running 5 ++#define XEN_DOMINF_running (1U<<_XEN_DOMINF_running) ++ /* Being debugged. */ ++#define _XEN_DOMINF_debugged 6 ++#define XEN_DOMINF_debugged (1U<<_XEN_DOMINF_debugged) ++ /* CPU to which this domain is bound. */ ++#define XEN_DOMINF_cpumask 255 ++#define XEN_DOMINF_cpushift 8 ++ /* XEN_DOMINF_shutdown guest-supplied code. */ ++#define XEN_DOMINF_shutdownmask 255 ++#define XEN_DOMINF_shutdownshift 16 ++ uint32_t flags; /* XEN_DOMINF_* */ ++ uint64_aligned_t tot_pages; ++ uint64_aligned_t max_pages; ++ uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */ ++ uint64_aligned_t cpu_time; ++ uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */ ++ uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */ ++ uint32_t ssidref; ++ xen_domain_handle_t handle; ++}; ++typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t); ++ ++ ++#define XEN_DOMCTL_getmemlist 6 ++struct xen_domctl_getmemlist { ++ /* IN variables. */ ++ /* Max entries to write to output buffer. */ ++ uint64_aligned_t max_pfns; ++ /* Start index in guest's page list. */ ++ uint64_aligned_t start_pfn; ++ XEN_GUEST_HANDLE_64(uint64) buffer; ++ /* OUT variables. */ ++ uint64_aligned_t num_pfns; ++}; ++typedef struct xen_domctl_getmemlist xen_domctl_getmemlist_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t); ++ ++ ++#define XEN_DOMCTL_getpageframeinfo 7 ++ ++#define XEN_DOMCTL_PFINFO_LTAB_SHIFT 28 ++#define XEN_DOMCTL_PFINFO_NOTAB (0x0U<<28) ++#define XEN_DOMCTL_PFINFO_L1TAB (0x1U<<28) ++#define XEN_DOMCTL_PFINFO_L2TAB (0x2U<<28) ++#define XEN_DOMCTL_PFINFO_L3TAB (0x3U<<28) ++#define XEN_DOMCTL_PFINFO_L4TAB (0x4U<<28) ++#define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7U<<28) ++#define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31) ++#define XEN_DOMCTL_PFINFO_XTAB (0xfU<<28) /* invalid page */ ++#define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28) ++ ++struct xen_domctl_getpageframeinfo { ++ /* IN variables. */ ++ uint64_aligned_t gmfn; /* GMFN to query */ ++ /* OUT variables. */ ++ /* Is the page PINNED to a type? */ ++ uint32_t type; /* see above type defs */ ++}; ++typedef struct xen_domctl_getpageframeinfo xen_domctl_getpageframeinfo_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t); ++ ++ ++#define XEN_DOMCTL_getpageframeinfo2 8 ++struct xen_domctl_getpageframeinfo2 { ++ /* IN variables. */ ++ uint64_aligned_t num; ++ /* IN/OUT variables. */ ++ XEN_GUEST_HANDLE_64(uint32) array; ++}; ++typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t); ++ ++ ++/* ++ * Control shadow pagetables operation ++ */ ++#define XEN_DOMCTL_shadow_op 10 ++ ++/* Disable shadow mode. */ ++#define XEN_DOMCTL_SHADOW_OP_OFF 0 ++ ++/* Enable shadow mode (mode contains ORed XEN_DOMCTL_SHADOW_ENABLE_* flags). */ ++#define XEN_DOMCTL_SHADOW_OP_ENABLE 32 ++ ++/* Log-dirty bitmap operations. */ ++ /* Return the bitmap and clean internal copy for next round. */ ++#define XEN_DOMCTL_SHADOW_OP_CLEAN 11 ++ /* Return the bitmap but do not modify internal copy. */ ++#define XEN_DOMCTL_SHADOW_OP_PEEK 12 ++ ++/* Memory allocation accessors. */ ++#define XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION 30 ++#define XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION 31 ++ ++/* Legacy enable operations. */ ++ /* Equiv. to ENABLE with no mode flags. */ ++#define XEN_DOMCTL_SHADOW_OP_ENABLE_TEST 1 ++ /* Equiv. to ENABLE with mode flag ENABLE_LOG_DIRTY. */ ++#define XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY 2 ++ /* Equiv. to ENABLE with mode flags ENABLE_REFCOUNT and ENABLE_TRANSLATE. */ ++#define XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE 3 ++ ++/* Mode flags for XEN_DOMCTL_SHADOW_OP_ENABLE. */ ++ /* ++ * Shadow pagetables are refcounted: guest does not use explicit mmu ++ * operations nor write-protect its pagetables. ++ */ ++#define XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT (1 << 1) ++ /* ++ * Log pages in a bitmap as they are dirtied. ++ * Used for live relocation to determine which pages must be re-sent. ++ */ ++#define XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY (1 << 2) ++ /* ++ * Automatically translate GPFNs into MFNs. ++ */ ++#define XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE (1 << 3) ++ /* ++ * Xen does not steal virtual address space from the guest. ++ * Requires HVM support. ++ */ ++#define XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL (1 << 4) ++ ++struct xen_domctl_shadow_op_stats { ++ uint32_t fault_count; ++ uint32_t dirty_count; ++}; ++typedef struct xen_domctl_shadow_op_stats xen_domctl_shadow_op_stats_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_stats_t); ++ ++struct xen_domctl_shadow_op { ++ /* IN variables. */ ++ uint32_t op; /* XEN_DOMCTL_SHADOW_OP_* */ ++ ++ /* OP_ENABLE */ ++ uint32_t mode; /* XEN_DOMCTL_SHADOW_ENABLE_* */ ++ ++ /* OP_GET_ALLOCATION / OP_SET_ALLOCATION */ ++ uint32_t mb; /* Shadow memory allocation in MB */ ++ ++ /* OP_PEEK / OP_CLEAN */ ++ XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; ++ uint64_aligned_t pages; /* Size of buffer. Updated with actual size. */ ++ struct xen_domctl_shadow_op_stats stats; ++}; ++typedef struct xen_domctl_shadow_op xen_domctl_shadow_op_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_t); ++ ++ ++#define XEN_DOMCTL_max_mem 11 ++struct xen_domctl_max_mem { ++ /* IN variables. */ ++ uint64_aligned_t max_memkb; ++}; ++typedef struct xen_domctl_max_mem xen_domctl_max_mem_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t); ++ ++ ++#define XEN_DOMCTL_setvcpucontext 12 ++#define XEN_DOMCTL_getvcpucontext 13 ++struct xen_domctl_vcpucontext { ++ uint32_t vcpu; /* IN */ ++ XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */ ++}; ++typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t); ++ ++ ++#define XEN_DOMCTL_getvcpuinfo 14 ++struct xen_domctl_getvcpuinfo { ++ /* IN variables. */ ++ uint32_t vcpu; ++ /* OUT variables. */ ++ uint8_t online; /* currently online (not hotplugged)? */ ++ uint8_t blocked; /* blocked waiting for an event? */ ++ uint8_t running; /* currently scheduled on its CPU? */ ++ uint64_aligned_t cpu_time; /* total cpu time consumed (ns) */ ++ uint32_t cpu; /* current mapping */ ++}; ++typedef struct xen_domctl_getvcpuinfo xen_domctl_getvcpuinfo_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t); ++ ++ ++/* Get/set which physical cpus a vcpu can execute on. */ ++#define XEN_DOMCTL_setvcpuaffinity 9 ++#define XEN_DOMCTL_getvcpuaffinity 25 ++struct xen_domctl_vcpuaffinity { ++ uint32_t vcpu; /* IN */ ++ struct xenctl_cpumap cpumap; /* IN/OUT */ ++}; ++typedef struct xen_domctl_vcpuaffinity xen_domctl_vcpuaffinity_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuaffinity_t); ++ ++ ++#define XEN_DOMCTL_max_vcpus 15 ++struct xen_domctl_max_vcpus { ++ uint32_t max; /* maximum number of vcpus */ ++}; ++typedef struct xen_domctl_max_vcpus xen_domctl_max_vcpus_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t); ++ ++ ++#define XEN_DOMCTL_scheduler_op 16 ++/* Scheduler types. */ ++#define XEN_SCHEDULER_SEDF 4 ++#define XEN_SCHEDULER_CREDIT 5 ++/* Set or get info? */ ++#define XEN_DOMCTL_SCHEDOP_putinfo 0 ++#define XEN_DOMCTL_SCHEDOP_getinfo 1 ++struct xen_domctl_scheduler_op { ++ uint32_t sched_id; /* XEN_SCHEDULER_* */ ++ uint32_t cmd; /* XEN_DOMCTL_SCHEDOP_* */ ++ union { ++ struct xen_domctl_sched_sedf { ++ uint64_aligned_t period; ++ uint64_aligned_t slice; ++ uint64_aligned_t latency; ++ uint32_t extratime; ++ uint32_t weight; ++ } sedf; ++ struct xen_domctl_sched_credit { ++ uint16_t weight; ++ uint16_t cap; ++ } credit; ++ } u; ++}; ++typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_scheduler_op_t); ++ ++ ++#define XEN_DOMCTL_setdomainhandle 17 ++struct xen_domctl_setdomainhandle { ++ xen_domain_handle_t handle; ++}; ++typedef struct xen_domctl_setdomainhandle xen_domctl_setdomainhandle_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdomainhandle_t); ++ ++ ++#define XEN_DOMCTL_setdebugging 18 ++struct xen_domctl_setdebugging { ++ uint8_t enable; ++}; ++typedef struct xen_domctl_setdebugging xen_domctl_setdebugging_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdebugging_t); ++ ++ ++#define XEN_DOMCTL_irq_permission 19 ++struct xen_domctl_irq_permission { ++ uint8_t pirq; ++ uint8_t allow_access; /* flag to specify enable/disable of IRQ access */ ++}; ++typedef struct xen_domctl_irq_permission xen_domctl_irq_permission_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_permission_t); ++ ++ ++#define XEN_DOMCTL_iomem_permission 20 ++struct xen_domctl_iomem_permission { ++ uint64_aligned_t first_mfn;/* first page (physical page number) in range */ ++ uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ ++ uint8_t allow_access; /* allow (!0) or deny (0) access to range? */ ++}; ++typedef struct xen_domctl_iomem_permission xen_domctl_iomem_permission_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem_permission_t); ++ ++ ++#define XEN_DOMCTL_ioport_permission 21 ++struct xen_domctl_ioport_permission { ++ uint32_t first_port; /* first port int range */ ++ uint32_t nr_ports; /* size of port range */ ++ uint8_t allow_access; /* allow or deny access to range? */ ++}; ++typedef struct xen_domctl_ioport_permission xen_domctl_ioport_permission_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_permission_t); ++ ++ ++#define XEN_DOMCTL_hypercall_init 22 ++struct xen_domctl_hypercall_init { ++ uint64_aligned_t gmfn; /* GMFN to be initialised */ ++}; ++typedef struct xen_domctl_hypercall_init xen_domctl_hypercall_init_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_hypercall_init_t); ++ ++ ++#define XEN_DOMCTL_arch_setup 23 ++#define _XEN_DOMAINSETUP_hvm_guest 0 ++#define XEN_DOMAINSETUP_hvm_guest (1UL<<_XEN_DOMAINSETUP_hvm_guest) ++#define _XEN_DOMAINSETUP_query 1 /* Get parameters (for save) */ ++#define XEN_DOMAINSETUP_query (1UL<<_XEN_DOMAINSETUP_query) ++#define _XEN_DOMAINSETUP_sioemu_guest 2 ++#define XEN_DOMAINSETUP_sioemu_guest (1UL<<_XEN_DOMAINSETUP_sioemu_guest) ++typedef struct xen_domctl_arch_setup { ++ uint64_aligned_t flags; /* XEN_DOMAINSETUP_* */ ++#ifdef __ia64__ ++ uint64_aligned_t bp; /* mpaddr of boot param area */ ++ uint64_aligned_t maxmem; /* Highest memory address for MDT. */ ++ uint64_aligned_t xsi_va; /* Xen shared_info area virtual address. */ ++ uint32_t hypercall_imm; /* Break imm for Xen hypercalls. */ ++ int8_t vhpt_size_log2; /* Log2 of VHPT size. */ ++#endif ++} xen_domctl_arch_setup_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_arch_setup_t); ++ ++ ++#define XEN_DOMCTL_settimeoffset 24 ++struct xen_domctl_settimeoffset { ++ int32_t time_offset_seconds; /* applied to domain wallclock time */ ++}; ++typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t); ++ ++ ++#define XEN_DOMCTL_gethvmcontext 33 ++#define XEN_DOMCTL_sethvmcontext 34 ++typedef struct xen_domctl_hvmcontext { ++ uint32_t size; /* IN/OUT: size of buffer / bytes filled */ ++ XEN_GUEST_HANDLE_64(uint8) buffer; /* IN/OUT: data, or call ++ * gethvmcontext with NULL ++ * buffer to get size req'd */ ++} xen_domctl_hvmcontext_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t); ++ ++ ++#define XEN_DOMCTL_set_address_size 35 ++#define XEN_DOMCTL_get_address_size 36 ++typedef struct xen_domctl_address_size { ++ uint32_t size; ++} xen_domctl_address_size_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_address_size_t); ++ ++ ++#define XEN_DOMCTL_real_mode_area 26 ++struct xen_domctl_real_mode_area { ++ uint32_t log; /* log2 of Real Mode Area size */ ++}; ++typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t); ++ ++ ++#define XEN_DOMCTL_sendtrigger 28 ++#define XEN_DOMCTL_SENDTRIGGER_NMI 0 ++#define XEN_DOMCTL_SENDTRIGGER_RESET 1 ++#define XEN_DOMCTL_SENDTRIGGER_INIT 2 ++struct xen_domctl_sendtrigger { ++ uint32_t trigger; /* IN */ ++ uint32_t vcpu; /* IN */ ++}; ++typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t); ++ ++ ++/* Assign PCI device to HVM guest. Sets up IOMMU structures. */ ++#define XEN_DOMCTL_assign_device 37 ++#define XEN_DOMCTL_test_assign_device 45 ++#define XEN_DOMCTL_deassign_device 47 ++struct xen_domctl_assign_device { ++ uint32_t machine_bdf; /* machine PCI ID of assigned device */ ++}; ++typedef struct xen_domctl_assign_device xen_domctl_assign_device_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t); ++ ++/* Retrieve sibling devices infomation of machine_bdf */ ++#define XEN_DOMCTL_get_device_group 50 ++struct xen_domctl_get_device_group { ++ uint32_t machine_bdf; /* IN */ ++ uint32_t max_sdevs; /* IN */ ++ uint32_t num_sdevs; /* OUT */ ++ XEN_GUEST_HANDLE_64(uint32) sdev_array; /* OUT */ ++}; ++typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t); ++ ++/* Pass-through interrupts: bind real irq -> hvm devfn. */ ++#define XEN_DOMCTL_bind_pt_irq 38 ++#define XEN_DOMCTL_unbind_pt_irq 48 ++typedef enum pt_irq_type_e { ++ PT_IRQ_TYPE_PCI, ++ PT_IRQ_TYPE_ISA, ++ PT_IRQ_TYPE_MSI, ++} pt_irq_type_t; ++struct xen_domctl_bind_pt_irq { ++ uint32_t machine_irq; ++ pt_irq_type_t irq_type; ++ uint32_t hvm_domid; ++ ++ union { ++ struct { ++ uint8_t isa_irq; ++ } isa; ++ struct { ++ uint8_t bus; ++ uint8_t device; ++ uint8_t intx; ++ } pci; ++ struct { ++ uint8_t gvec; ++ uint32_t gflags; ++ } msi; ++ } u; ++}; ++typedef struct xen_domctl_bind_pt_irq xen_domctl_bind_pt_irq_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_bind_pt_irq_t); ++ ++ ++/* Bind machine I/O address range -> HVM address range. */ ++#define XEN_DOMCTL_memory_mapping 39 ++#define DPCI_ADD_MAPPING 1 ++#define DPCI_REMOVE_MAPPING 0 ++struct xen_domctl_memory_mapping { ++ uint64_aligned_t first_gfn; /* first page (hvm guest phys page) in range */ ++ uint64_aligned_t first_mfn; /* first page (machine page) in range */ ++ uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ ++ uint32_t add_mapping; /* add or remove mapping */ ++ uint32_t padding; /* padding for 64-bit aligned structure */ ++}; ++typedef struct xen_domctl_memory_mapping xen_domctl_memory_mapping_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_memory_mapping_t); ++ ++ ++/* Bind machine I/O port range -> HVM I/O port range. */ ++#define XEN_DOMCTL_ioport_mapping 40 ++struct xen_domctl_ioport_mapping { ++ uint32_t first_gport; /* first guest IO port*/ ++ uint32_t first_mport; /* first machine IO port */ ++ uint32_t nr_ports; /* size of port range */ ++ uint32_t add_mapping; /* add or remove mapping */ ++}; ++typedef struct xen_domctl_ioport_mapping xen_domctl_ioport_mapping_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_mapping_t); ++ ++ ++/* ++ * Pin caching type of RAM space for x86 HVM domU. ++ */ ++#define XEN_DOMCTL_pin_mem_cacheattr 41 ++/* Caching types: these happen to be the same as x86 MTRR/PAT type codes. */ ++#define XEN_DOMCTL_MEM_CACHEATTR_UC 0 ++#define XEN_DOMCTL_MEM_CACHEATTR_WC 1 ++#define XEN_DOMCTL_MEM_CACHEATTR_WT 4 ++#define XEN_DOMCTL_MEM_CACHEATTR_WP 5 ++#define XEN_DOMCTL_MEM_CACHEATTR_WB 6 ++#define XEN_DOMCTL_MEM_CACHEATTR_UCM 7 ++struct xen_domctl_pin_mem_cacheattr { ++ uint64_aligned_t start, end; ++ unsigned int type; /* XEN_DOMCTL_MEM_CACHEATTR_* */ ++}; ++typedef struct xen_domctl_pin_mem_cacheattr xen_domctl_pin_mem_cacheattr_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t); ++ ++ ++#define XEN_DOMCTL_set_ext_vcpucontext 42 ++#define XEN_DOMCTL_get_ext_vcpucontext 43 ++struct xen_domctl_ext_vcpucontext { ++ /* IN: VCPU that this call applies to. */ ++ uint32_t vcpu; ++ /* ++ * SET: Size of struct (IN) ++ * GET: Size of struct (OUT) ++ */ ++ uint32_t size; ++#if defined(__i386__) || defined(__x86_64__) ++ /* SYSCALL from 32-bit mode and SYSENTER callback information. */ ++ /* NB. SYSCALL from 64-bit mode is contained in vcpu_guest_context_t */ ++ uint64_aligned_t syscall32_callback_eip; ++ uint64_aligned_t sysenter_callback_eip; ++ uint16_t syscall32_callback_cs; ++ uint16_t sysenter_callback_cs; ++ uint8_t syscall32_disables_events; ++ uint8_t sysenter_disables_events; ++#endif ++}; ++typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpucontext_t); ++ ++/* ++ * Set optimizaton features for a domain ++ */ ++#define XEN_DOMCTL_set_opt_feature 44 ++struct xen_domctl_set_opt_feature { ++#if defined(__ia64__) ++ struct xen_ia64_opt_feature optf; ++#else ++ /* Make struct non-empty: do not depend on this field name! */ ++ uint64_t dummy; ++#endif ++}; ++typedef struct xen_domctl_set_opt_feature xen_domctl_set_opt_feature_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_opt_feature_t); ++ ++/* ++ * Set the target domain for a domain ++ */ ++#define XEN_DOMCTL_set_target 46 ++struct xen_domctl_set_target { ++ domid_t target; ++}; ++typedef struct xen_domctl_set_target xen_domctl_set_target_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_target_t); ++ ++#if defined(__i386__) || defined(__x86_64__) ++# define XEN_CPUID_INPUT_UNUSED 0xFFFFFFFF ++# define XEN_DOMCTL_set_cpuid 49 ++struct xen_domctl_cpuid { ++ unsigned int input[2]; ++ unsigned int eax; ++ unsigned int ebx; ++ unsigned int ecx; ++ unsigned int edx; ++}; ++typedef struct xen_domctl_cpuid xen_domctl_cpuid_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpuid_t); ++#endif ++ ++#define XEN_DOMCTL_subscribe 29 ++struct xen_domctl_subscribe { ++ uint32_t port; /* IN */ ++}; ++typedef struct xen_domctl_subscribe xen_domctl_subscribe_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_subscribe_t); ++ ++/* ++ * Define the maximum machine address size which should be allocated ++ * to a guest. ++ */ ++#define XEN_DOMCTL_set_machine_address_size 51 ++#define XEN_DOMCTL_get_machine_address_size 52 ++ ++/* ++ * Do not inject spurious page faults into this domain. ++ */ ++#define XEN_DOMCTL_suppress_spurious_page_faults 53 ++ ++struct xen_domctl { ++ uint32_t cmd; ++ uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */ ++ domid_t domain; ++ union { ++ struct xen_domctl_createdomain createdomain; ++ struct xen_domctl_getdomaininfo getdomaininfo; ++ struct xen_domctl_getmemlist getmemlist; ++ struct xen_domctl_getpageframeinfo getpageframeinfo; ++ struct xen_domctl_getpageframeinfo2 getpageframeinfo2; ++ struct xen_domctl_vcpuaffinity vcpuaffinity; ++ struct xen_domctl_shadow_op shadow_op; ++ struct xen_domctl_max_mem max_mem; ++ struct xen_domctl_vcpucontext vcpucontext; ++ struct xen_domctl_getvcpuinfo getvcpuinfo; ++ struct xen_domctl_max_vcpus max_vcpus; ++ struct xen_domctl_scheduler_op scheduler_op; ++ struct xen_domctl_setdomainhandle setdomainhandle; ++ struct xen_domctl_setdebugging setdebugging; ++ struct xen_domctl_irq_permission irq_permission; ++ struct xen_domctl_iomem_permission iomem_permission; ++ struct xen_domctl_ioport_permission ioport_permission; ++ struct xen_domctl_hypercall_init hypercall_init; ++ struct xen_domctl_arch_setup arch_setup; ++ struct xen_domctl_settimeoffset settimeoffset; ++ struct xen_domctl_real_mode_area real_mode_area; ++ struct xen_domctl_hvmcontext hvmcontext; ++ struct xen_domctl_address_size address_size; ++ struct xen_domctl_sendtrigger sendtrigger; ++ struct xen_domctl_get_device_group get_device_group; ++ struct xen_domctl_assign_device assign_device; ++ struct xen_domctl_bind_pt_irq bind_pt_irq; ++ struct xen_domctl_memory_mapping memory_mapping; ++ struct xen_domctl_ioport_mapping ioport_mapping; ++ struct xen_domctl_pin_mem_cacheattr pin_mem_cacheattr; ++ struct xen_domctl_ext_vcpucontext ext_vcpucontext; ++ struct xen_domctl_set_opt_feature set_opt_feature; ++ struct xen_domctl_set_target set_target; ++ struct xen_domctl_subscribe subscribe; ++#if defined(__i386__) || defined(__x86_64__) ++ struct xen_domctl_cpuid cpuid; ++#endif ++ uint8_t pad[128]; ++ } u; ++}; ++typedef struct xen_domctl xen_domctl_t; ++DEFINE_XEN_GUEST_HANDLE(xen_domctl_t); ++ ++#endif /* __XEN_PUBLIC_DOMCTL_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +Index: head-2008-11-25/include/xen/interface/hvm/e820.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/hvm/e820.h 2007-06-12 13:14:19.000000000 +0200 +@@ -0,0 +1,34 @@ ++ ++/* ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef __XEN_PUBLIC_HVM_E820_H__ ++#define __XEN_PUBLIC_HVM_E820_H__ ++ ++/* E820 location in HVM virtual address space. */ ++#define HVM_E820_PAGE 0x00090000 ++#define HVM_E820_NR_OFFSET 0x000001E8 ++#define HVM_E820_OFFSET 0x000002D0 ++ ++#define HVM_BELOW_4G_RAM_END 0xF0000000 ++#define HVM_BELOW_4G_MMIO_START HVM_BELOW_4G_RAM_END ++#define HVM_BELOW_4G_MMIO_LENGTH ((1ULL << 32) - HVM_BELOW_4G_MMIO_START) ++ ++#endif /* __XEN_PUBLIC_HVM_E820_H__ */ +Index: head-2008-11-25/include/xen/interface/hvm/hvm_info_table.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/hvm/hvm_info_table.h 2007-06-12 13:14:19.000000000 +0200 +@@ -0,0 +1,41 @@ ++/****************************************************************************** ++ * hvm/hvm_info_table.h ++ * ++ * HVM parameter and information table, written into guest memory map. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ ++#define __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ ++ ++#define HVM_INFO_PFN 0x09F ++#define HVM_INFO_OFFSET 0x800 ++#define HVM_INFO_PADDR ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET) ++ ++struct hvm_info_table { ++ char signature[8]; /* "HVM INFO" */ ++ uint32_t length; ++ uint8_t checksum; ++ uint8_t acpi_enabled; ++ uint8_t apic_mode; ++ uint32_t nr_vcpus; ++}; ++ ++#endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */ +Index: head-2008-11-25/include/xen/interface/hvm/hvm_op.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/hvm/hvm_op.h 2008-09-01 12:07:31.000000000 +0200 +@@ -0,0 +1,131 @@ ++/* ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ ++#define __XEN_PUBLIC_HVM_HVM_OP_H__ ++ ++/* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */ ++#define HVMOP_set_param 0 ++#define HVMOP_get_param 1 ++struct xen_hvm_param { ++ domid_t domid; /* IN */ ++ uint32_t index; /* IN */ ++ uint64_t value; /* IN/OUT */ ++}; ++typedef struct xen_hvm_param xen_hvm_param_t; ++DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t); ++ ++/* Set the logical level of one of a domain's PCI INTx wires. */ ++#define HVMOP_set_pci_intx_level 2 ++struct xen_hvm_set_pci_intx_level { ++ /* Domain to be updated. */ ++ domid_t domid; ++ /* PCI INTx identification in PCI topology (domain:bus:device:intx). */ ++ uint8_t domain, bus, device, intx; ++ /* Assertion level (0 = unasserted, 1 = asserted). */ ++ uint8_t level; ++}; ++typedef struct xen_hvm_set_pci_intx_level xen_hvm_set_pci_intx_level_t; ++DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t); ++ ++/* Set the logical level of one of a domain's ISA IRQ wires. */ ++#define HVMOP_set_isa_irq_level 3 ++struct xen_hvm_set_isa_irq_level { ++ /* Domain to be updated. */ ++ domid_t domid; ++ /* ISA device identification, by ISA IRQ (0-15). */ ++ uint8_t isa_irq; ++ /* Assertion level (0 = unasserted, 1 = asserted). */ ++ uint8_t level; ++}; ++typedef struct xen_hvm_set_isa_irq_level xen_hvm_set_isa_irq_level_t; ++DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t); ++ ++#define HVMOP_set_pci_link_route 4 ++struct xen_hvm_set_pci_link_route { ++ /* Domain to be updated. */ ++ domid_t domid; ++ /* PCI link identifier (0-3). */ ++ uint8_t link; ++ /* ISA IRQ (1-15), or 0 (disable link). */ ++ uint8_t isa_irq; ++}; ++typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t; ++DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t); ++ ++/* Flushes all VCPU TLBs: @arg must be NULL. */ ++#define HVMOP_flush_tlbs 5 ++ ++/* Following tools-only interfaces may change in future. */ ++#if defined(__XEN__) || defined(__XEN_TOOLS__) ++ ++/* Track dirty VRAM. */ ++#define HVMOP_track_dirty_vram 6 ++struct xen_hvm_track_dirty_vram { ++ /* Domain to be tracked. */ ++ domid_t domid; ++ /* First pfn to track. */ ++ uint64_aligned_t first_pfn; ++ /* Number of pages to track. */ ++ uint64_aligned_t nr; ++ /* OUT variable. */ ++ /* Dirty bitmap buffer. */ ++ XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; ++}; ++typedef struct xen_hvm_track_dirty_vram xen_hvm_track_dirty_vram_t; ++DEFINE_XEN_GUEST_HANDLE(xen_hvm_track_dirty_vram_t); ++ ++/* Notify that some pages got modified by the Device Model. */ ++#define HVMOP_modified_memory 7 ++struct xen_hvm_modified_memory { ++ /* Domain to be updated. */ ++ domid_t domid; ++ /* First pfn. */ ++ uint64_aligned_t first_pfn; ++ /* Number of pages. */ ++ uint64_aligned_t nr; ++}; ++typedef struct xen_hvm_modified_memory xen_hvm_modified_memory_t; ++DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t); ++ ++#define HVMOP_set_mem_type 8 ++typedef enum { ++ HVMMEM_ram_rw, /* Normal read/write guest RAM */ ++ HVMMEM_ram_ro, /* Read-only; writes are discarded */ ++ HVMMEM_mmio_dm, /* Reads and write go to the device model */ ++} hvmmem_type_t; ++/* Notify that a region of memory is to be treated in a specific way. */ ++struct xen_hvm_set_mem_type { ++ /* Domain to be updated. */ ++ domid_t domid; ++ /* Memory type */ ++ hvmmem_type_t hvmmem_type; ++ /* First pfn. */ ++ uint64_aligned_t first_pfn; ++ /* Number of pages. */ ++ uint64_aligned_t nr; ++}; ++typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t; ++DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t); ++ ++ ++#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ ++ ++#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ +Index: head-2008-11-25/include/xen/interface/hvm/ioreq.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/hvm/ioreq.h 2008-04-02 12:34:02.000000000 +0200 +@@ -0,0 +1,127 @@ ++/* ++ * ioreq.h: I/O request definitions for device models ++ * Copyright (c) 2004, Intel Corporation. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef _IOREQ_H_ ++#define _IOREQ_H_ ++ ++#define IOREQ_READ 1 ++#define IOREQ_WRITE 0 ++ ++#define STATE_IOREQ_NONE 0 ++#define STATE_IOREQ_READY 1 ++#define STATE_IOREQ_INPROCESS 2 ++#define STATE_IORESP_READY 3 ++ ++#define IOREQ_TYPE_PIO 0 /* pio */ ++#define IOREQ_TYPE_COPY 1 /* mmio ops */ ++#define IOREQ_TYPE_TIMEOFFSET 7 ++#define IOREQ_TYPE_INVALIDATE 8 /* mapcache */ ++ ++/* ++ * VMExit dispatcher should cooperate with instruction decoder to ++ * prepare this structure and notify service OS and DM by sending ++ * virq ++ */ ++struct ioreq { ++ uint64_t addr; /* physical address */ ++ uint64_t size; /* size in bytes */ ++ uint64_t count; /* for rep prefixes */ ++ uint64_t data; /* data (or paddr of data) */ ++ uint8_t state:4; ++ uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr ++ * of the real data to use. */ ++ uint8_t dir:1; /* 1=read, 0=write */ ++ uint8_t df:1; ++ uint8_t pad:1; ++ uint8_t type; /* I/O type */ ++ uint8_t _pad0[6]; ++ uint64_t io_count; /* How many IO done on a vcpu */ ++}; ++typedef struct ioreq ioreq_t; ++ ++struct vcpu_iodata { ++ struct ioreq vp_ioreq; ++ /* Event channel port, used for notifications to/from the device model. */ ++ uint32_t vp_eport; ++ uint32_t _pad0; ++}; ++typedef struct vcpu_iodata vcpu_iodata_t; ++ ++struct shared_iopage { ++ struct vcpu_iodata vcpu_iodata[1]; ++}; ++typedef struct shared_iopage shared_iopage_t; ++ ++struct buf_ioreq { ++ uint8_t type; /* I/O type */ ++ uint8_t pad:1; ++ uint8_t dir:1; /* 1=read, 0=write */ ++ uint8_t size:2; /* 0=>1, 1=>2, 2=>4, 3=>8. If 8, use two buf_ioreqs */ ++ uint32_t addr:20;/* physical address */ ++ uint32_t data; /* data */ ++}; ++typedef struct buf_ioreq buf_ioreq_t; ++ ++#define IOREQ_BUFFER_SLOT_NUM 511 /* 8 bytes each, plus 2 4-byte indexes */ ++struct buffered_iopage { ++ unsigned int read_pointer; ++ unsigned int write_pointer; ++ buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM]; ++}; /* NB. Size of this structure must be no greater than one page. */ ++typedef struct buffered_iopage buffered_iopage_t; ++ ++#if defined(__ia64__) ++struct pio_buffer { ++ uint32_t page_offset; ++ uint32_t pointer; ++ uint32_t data_end; ++ uint32_t buf_size; ++ void *opaque; ++}; ++ ++#define PIO_BUFFER_IDE_PRIMARY 0 /* I/O port = 0x1F0 */ ++#define PIO_BUFFER_IDE_SECONDARY 1 /* I/O port = 0x170 */ ++#define PIO_BUFFER_ENTRY_NUM 2 ++struct buffered_piopage { ++ struct pio_buffer pio[PIO_BUFFER_ENTRY_NUM]; ++ uint8_t buffer[1]; ++}; ++#endif /* defined(__ia64__) */ ++ ++#define ACPI_PM1A_EVT_BLK_ADDRESS 0x0000000000001f40 ++#define ACPI_PM1A_CNT_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04) ++#define ACPI_PM_TMR_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x08) ++#define ACPI_GPE0_BLK_ADDRESS (ACPI_PM_TMR_BLK_ADDRESS + 0x20) ++#define ACPI_GPE0_BLK_LEN 0x08 ++ ++#endif /* _IOREQ_H_ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +Index: head-2008-11-25/include/xen/interface/hvm/params.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/hvm/params.h 2008-10-29 09:55:56.000000000 +0100 +@@ -0,0 +1,105 @@ ++/* ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef __XEN_PUBLIC_HVM_PARAMS_H__ ++#define __XEN_PUBLIC_HVM_PARAMS_H__ ++ ++#include "hvm_op.h" ++ ++/* ++ * Parameter space for HVMOP_{set,get}_param. ++ */ ++ ++/* ++ * How should CPU0 event-channel notifications be delivered? ++ * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt). ++ * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: ++ * Domain = val[47:32], Bus = val[31:16], ++ * DevFn = val[15: 8], IntX = val[ 1: 0] ++ * If val == 0 then CPU0 event-channel notifications are not delivered. ++ */ ++#define HVM_PARAM_CALLBACK_IRQ 0 ++ ++/* ++ * These are not used by Xen. They are here for convenience of HVM-guest ++ * xenbus implementations. ++ */ ++#define HVM_PARAM_STORE_PFN 1 ++#define HVM_PARAM_STORE_EVTCHN 2 ++ ++#define HVM_PARAM_PAE_ENABLED 4 ++ ++#define HVM_PARAM_IOREQ_PFN 5 ++ ++#define HVM_PARAM_BUFIOREQ_PFN 6 ++ ++#ifdef __ia64__ ++ ++#define HVM_PARAM_NVRAM_FD 7 ++#define HVM_PARAM_VHPT_SIZE 8 ++#define HVM_PARAM_BUFPIOREQ_PFN 9 ++ ++#elif defined(__i386__) || defined(__x86_64__) ++ ++/* Expose Viridian interfaces to this HVM guest? */ ++#define HVM_PARAM_VIRIDIAN 9 ++ ++#endif ++ ++/* ++ * Set mode for virtual timers (currently x86 only): ++ * delay_for_missed_ticks (default): ++ * Do not advance a vcpu's time beyond the correct delivery time for ++ * interrupts that have been missed due to preemption. Deliver missed ++ * interrupts when the vcpu is rescheduled and advance the vcpu's virtual ++ * time stepwise for each one. ++ * no_delay_for_missed_ticks: ++ * As above, missed interrupts are delivered, but guest time always tracks ++ * wallclock (i.e., real) time while doing so. ++ * no_missed_ticks_pending: ++ * No missed interrupts are held pending. Instead, to ensure ticks are ++ * delivered at some non-zero rate, if we detect missed ticks then the ++ * internal tick alarm is not disabled if the VCPU is preempted during the ++ * next tick period. ++ * one_missed_tick_pending: ++ * Missed interrupts are collapsed together and delivered as one 'late tick'. ++ * Guest time always tracks wallclock (i.e., real) time. ++ */ ++#define HVM_PARAM_TIMER_MODE 10 ++#define HVMPTM_delay_for_missed_ticks 0 ++#define HVMPTM_no_delay_for_missed_ticks 1 ++#define HVMPTM_no_missed_ticks_pending 2 ++#define HVMPTM_one_missed_tick_pending 3 ++ ++/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */ ++#define HVM_PARAM_HPET_ENABLED 11 ++ ++/* Identity-map page directory used by Intel EPT when CR0.PG=0. */ ++#define HVM_PARAM_IDENT_PT 12 ++ ++/* Device Model domain, defaults to 0. */ ++#define HVM_PARAM_DM_DOMAIN 13 ++ ++/* ACPI S state: currently support S0 and S3 on x86. */ ++#define HVM_PARAM_ACPI_S_STATE 14 ++ ++#define HVM_NR_PARAMS 15 ++ ++#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ +Index: head-2008-11-25/include/xen/interface/hvm/save.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/hvm/save.h 2008-04-02 12:34:02.000000000 +0200 +@@ -0,0 +1,88 @@ ++/* ++ * hvm/save.h ++ * ++ * Structure definitions for HVM state that is held by Xen and must ++ * be saved along with the domain's memory and device-model state. ++ * ++ * Copyright (c) 2007 XenSource Ltd. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef __XEN_PUBLIC_HVM_SAVE_H__ ++#define __XEN_PUBLIC_HVM_SAVE_H__ ++ ++/* ++ * Structures in this header *must* have the same layout in 32bit ++ * and 64bit environments: this means that all fields must be explicitly ++ * sized types and aligned to their sizes, and the structs must be ++ * a multiple of eight bytes long. ++ * ++ * Only the state necessary for saving and restoring (i.e. fields ++ * that are analogous to actual hardware state) should go in this file. ++ * Internal mechanisms should be kept in Xen-private headers. ++ */ ++ ++#if !defined(__GNUC__) || defined(__STRICT_ANSI__) ++#error "Anonymous structs/unions are a GNU extension." ++#endif ++ ++/* ++ * Each entry is preceded by a descriptor giving its type and length ++ */ ++struct hvm_save_descriptor { ++ uint16_t typecode; /* Used to demux the various types below */ ++ uint16_t instance; /* Further demux within a type */ ++ uint32_t length; /* In bytes, *not* including this descriptor */ ++}; ++ ++ ++/* ++ * Each entry has a datatype associated with it: for example, the CPU state ++ * is saved as a HVM_SAVE_TYPE(CPU), which has HVM_SAVE_LENGTH(CPU), ++ * and is identified by a descriptor with typecode HVM_SAVE_CODE(CPU). ++ * DECLARE_HVM_SAVE_TYPE binds these things together with some type-system ++ * ugliness. ++ */ ++ ++#define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ ++ struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; } ++ ++#define HVM_SAVE_TYPE(_x) typeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->t) ++#define HVM_SAVE_LENGTH(_x) (sizeof (HVM_SAVE_TYPE(_x))) ++#define HVM_SAVE_CODE(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->c)) ++ ++ ++/* ++ * The series of save records is teminated by a zero-type, zero-length ++ * descriptor. ++ */ ++ ++struct hvm_save_end {}; ++DECLARE_HVM_SAVE_TYPE(END, 0, struct hvm_save_end); ++ ++#if defined(__i386__) || defined(__x86_64__) ++#include "../arch-x86/hvm/save.h" ++#elif defined(__ia64__) ++#include "../arch-ia64/hvm/save.h" ++#else ++#error "unsupported architecture" ++#endif ++ ++#endif /* __XEN_PUBLIC_HVM_SAVE_H__ */ +Index: head-2008-11-25/include/xen/interface/io/fsif.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/io/fsif.h 2008-09-01 12:07:31.000000000 +0200 +@@ -0,0 +1,191 @@ ++/****************************************************************************** ++ * fsif.h ++ * ++ * Interface to FS level split device drivers. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2007, Grzegorz Milos, . ++ */ ++ ++#ifndef __XEN_PUBLIC_IO_FSIF_H__ ++#define __XEN_PUBLIC_IO_FSIF_H__ ++ ++#include "ring.h" ++#include "../grant_table.h" ++ ++#define REQ_FILE_OPEN 1 ++#define REQ_FILE_CLOSE 2 ++#define REQ_FILE_READ 3 ++#define REQ_FILE_WRITE 4 ++#define REQ_STAT 5 ++#define REQ_FILE_TRUNCATE 6 ++#define REQ_REMOVE 7 ++#define REQ_RENAME 8 ++#define REQ_CREATE 9 ++#define REQ_DIR_LIST 10 ++#define REQ_CHMOD 11 ++#define REQ_FS_SPACE 12 ++#define REQ_FILE_SYNC 13 ++ ++struct fsif_open_request { ++ grant_ref_t gref; ++}; ++ ++struct fsif_close_request { ++ uint32_t fd; ++}; ++ ++struct fsif_read_request { ++ uint32_t fd; ++ int32_t pad; ++ uint64_t len; ++ uint64_t offset; ++ grant_ref_t grefs[1]; /* Variable length */ ++}; ++ ++struct fsif_write_request { ++ uint32_t fd; ++ int32_t pad; ++ uint64_t len; ++ uint64_t offset; ++ grant_ref_t grefs[1]; /* Variable length */ ++}; ++ ++struct fsif_stat_request { ++ uint32_t fd; ++}; ++ ++/* This structure is a copy of some fields from stat structure, returned ++ * via the ring. */ ++struct fsif_stat_response { ++ int32_t stat_mode; ++ uint32_t stat_uid; ++ uint32_t stat_gid; ++ int32_t stat_ret; ++ int64_t stat_size; ++ int64_t stat_atime; ++ int64_t stat_mtime; ++ int64_t stat_ctime; ++}; ++ ++struct fsif_truncate_request { ++ uint32_t fd; ++ int32_t pad; ++ int64_t length; ++}; ++ ++struct fsif_remove_request { ++ grant_ref_t gref; ++}; ++ ++struct fsif_rename_request { ++ uint16_t old_name_offset; ++ uint16_t new_name_offset; ++ grant_ref_t gref; ++}; ++ ++struct fsif_create_request { ++ int8_t directory; ++ int8_t pad; ++ int16_t pad2; ++ int32_t mode; ++ grant_ref_t gref; ++}; ++ ++struct fsif_list_request { ++ uint32_t offset; ++ grant_ref_t gref; ++}; ++ ++#define NR_FILES_SHIFT 0 ++#define NR_FILES_SIZE 16 /* 16 bits for the number of files mask */ ++#define NR_FILES_MASK (((1ULL << NR_FILES_SIZE) - 1) << NR_FILES_SHIFT) ++#define ERROR_SIZE 32 /* 32 bits for the error mask */ ++#define ERROR_SHIFT (NR_FILES_SIZE + NR_FILES_SHIFT) ++#define ERROR_MASK (((1ULL << ERROR_SIZE) - 1) << ERROR_SHIFT) ++#define HAS_MORE_SHIFT (ERROR_SHIFT + ERROR_SIZE) ++#define HAS_MORE_FLAG (1ULL << HAS_MORE_SHIFT) ++ ++struct fsif_chmod_request { ++ uint32_t fd; ++ int32_t mode; ++}; ++ ++struct fsif_space_request { ++ grant_ref_t gref; ++}; ++ ++struct fsif_sync_request { ++ uint32_t fd; ++}; ++ ++ ++/* FS operation request */ ++struct fsif_request { ++ uint8_t type; /* Type of the request */ ++ uint8_t pad; ++ uint16_t id; /* Request ID, copied to the response */ ++ uint32_t pad2; ++ union { ++ struct fsif_open_request fopen; ++ struct fsif_close_request fclose; ++ struct fsif_read_request fread; ++ struct fsif_write_request fwrite; ++ struct fsif_stat_request fstat; ++ struct fsif_truncate_request ftruncate; ++ struct fsif_remove_request fremove; ++ struct fsif_rename_request frename; ++ struct fsif_create_request fcreate; ++ struct fsif_list_request flist; ++ struct fsif_chmod_request fchmod; ++ struct fsif_space_request fspace; ++ struct fsif_sync_request fsync; ++ } u; ++}; ++typedef struct fsif_request fsif_request_t; ++ ++/* FS operation response */ ++struct fsif_response { ++ uint16_t id; ++ uint16_t pad1; ++ uint32_t pad2; ++ union { ++ uint64_t ret_val; ++ struct fsif_stat_response fstat; ++ }; ++}; ++ ++typedef struct fsif_response fsif_response_t; ++ ++#define FSIF_RING_ENTRY_SIZE 64 ++ ++#define FSIF_NR_READ_GNTS ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_read_request)) / \ ++ sizeof(grant_ref_t) + 1) ++#define FSIF_NR_WRITE_GNTS ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_write_request)) / \ ++ sizeof(grant_ref_t) + 1) ++ ++DEFINE_RING_TYPES(fsif, struct fsif_request, struct fsif_response); ++ ++#define STATE_INITIALISED "init" ++#define STATE_READY "ready" ++ ++ ++ ++#endif +Index: head-2008-11-25/include/xen/interface/io/pciif.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/io/pciif.h 2008-07-21 11:00:33.000000000 +0200 +@@ -0,0 +1,101 @@ ++/* ++ * PCI Backend/Frontend Common Data Structures & Macros ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Author: Ryan Wilson ++ */ ++#ifndef __XEN_PCI_COMMON_H__ ++#define __XEN_PCI_COMMON_H__ ++ ++/* Be sure to bump this number if you change this file */ ++#define XEN_PCI_MAGIC "7" ++ ++/* xen_pci_sharedinfo flags */ ++#define _XEN_PCIF_active (0) ++#define XEN_PCIF_active (1<<_XEN_PCI_active) ++ ++/* xen_pci_op commands */ ++#define XEN_PCI_OP_conf_read (0) ++#define XEN_PCI_OP_conf_write (1) ++#define XEN_PCI_OP_enable_msi (2) ++#define XEN_PCI_OP_disable_msi (3) ++#define XEN_PCI_OP_enable_msix (4) ++#define XEN_PCI_OP_disable_msix (5) ++ ++/* xen_pci_op error numbers */ ++#define XEN_PCI_ERR_success (0) ++#define XEN_PCI_ERR_dev_not_found (-1) ++#define XEN_PCI_ERR_invalid_offset (-2) ++#define XEN_PCI_ERR_access_denied (-3) ++#define XEN_PCI_ERR_not_implemented (-4) ++/* XEN_PCI_ERR_op_failed - backend failed to complete the operation */ ++#define XEN_PCI_ERR_op_failed (-5) ++ ++/* ++ * it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry)) ++ * Should not exceed 128 ++ */ ++#define SH_INFO_MAX_VEC 128 ++ ++struct xen_msix_entry { ++ uint16_t vector; ++ uint16_t entry; ++}; ++struct xen_pci_op { ++ /* IN: what action to perform: XEN_PCI_OP_* */ ++ uint32_t cmd; ++ ++ /* OUT: will contain an error number (if any) from errno.h */ ++ int32_t err; ++ ++ /* IN: which device to touch */ ++ uint32_t domain; /* PCI Domain/Segment */ ++ uint32_t bus; ++ uint32_t devfn; ++ ++ /* IN: which configuration registers to touch */ ++ int32_t offset; ++ int32_t size; ++ ++ /* IN/OUT: Contains the result after a READ or the value to WRITE */ ++ uint32_t value; ++ /* IN: Contains extra infor for this operation */ ++ uint32_t info; ++ /*IN: param for msi-x */ ++ struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC]; ++}; ++ ++struct xen_pci_sharedinfo { ++ /* flags - XEN_PCIF_* */ ++ uint32_t flags; ++ struct xen_pci_op op; ++}; ++ ++#endif /* __XEN_PCI_COMMON_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +Index: head-2008-11-25/include/xen/interface/io/tpmif.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/io/tpmif.h 2007-06-12 13:14:19.000000000 +0200 +@@ -0,0 +1,77 @@ ++/****************************************************************************** ++ * tpmif.h ++ * ++ * TPM I/O interface for Xen guest OSes. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2005, IBM Corporation ++ * ++ * Author: Stefan Berger, stefanb@us.ibm.com ++ * Grant table support: Mahadevan Gomathisankaran ++ * ++ * This code has been derived from tools/libxc/xen/io/netif.h ++ * ++ * Copyright (c) 2003-2004, Keir Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_IO_TPMIF_H__ ++#define __XEN_PUBLIC_IO_TPMIF_H__ ++ ++#include "../grant_table.h" ++ ++struct tpmif_tx_request { ++ unsigned long addr; /* Machine address of packet. */ ++ grant_ref_t ref; /* grant table access reference */ ++ uint16_t unused; ++ uint16_t size; /* Packet size in bytes. */ ++}; ++typedef struct tpmif_tx_request tpmif_tx_request_t; ++ ++/* ++ * The TPMIF_TX_RING_SIZE defines the number of pages the ++ * front-end and backend can exchange (= size of array). ++ */ ++typedef uint32_t TPMIF_RING_IDX; ++ ++#define TPMIF_TX_RING_SIZE 1 ++ ++/* This structure must fit in a memory page. */ ++ ++struct tpmif_ring { ++ struct tpmif_tx_request req; ++}; ++typedef struct tpmif_ring tpmif_ring_t; ++ ++struct tpmif_tx_interface { ++ struct tpmif_ring ring[TPMIF_TX_RING_SIZE]; ++}; ++typedef struct tpmif_tx_interface tpmif_tx_interface_t; ++ ++#endif ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +Index: head-2008-11-25/include/xen/interface/io/vscsiif.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/io/vscsiif.h 2008-07-21 11:00:33.000000000 +0200 +@@ -0,0 +1,105 @@ ++/****************************************************************************** ++ * vscsiif.h ++ * ++ * Based on the blkif.h code. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright(c) FUJITSU Limited 2008. ++ */ ++ ++#ifndef __XEN__PUBLIC_IO_SCSI_H__ ++#define __XEN__PUBLIC_IO_SCSI_H__ ++ ++#include "ring.h" ++#include "../grant_table.h" ++ ++/* command between backend and frontend */ ++#define VSCSIIF_ACT_SCSI_CDB 1 /* SCSI CDB command */ ++#define VSCSIIF_ACT_SCSI_ABORT 2 /* SCSI Device(Lun) Abort*/ ++#define VSCSIIF_ACT_SCSI_RESET 3 /* SCSI Device(Lun) Reset*/ ++ ++ ++#define VSCSIIF_BACK_MAX_PENDING_REQS 128 ++ ++/* ++ * Maximum scatter/gather segments per request. ++ * ++ * Considering balance between allocating al least 16 "vscsiif_request" ++ * structures on one page (4096bytes) and number of scatter gather ++ * needed, we decided to use 26 as a magic number. ++ */ ++#define VSCSIIF_SG_TABLESIZE 26 ++ ++/* ++ * base on linux kernel 2.6.18 ++ */ ++#define VSCSIIF_MAX_COMMAND_SIZE 16 ++#define VSCSIIF_SENSE_BUFFERSIZE 96 ++ ++ ++struct vscsiif_request { ++ uint16_t rqid; /* private guest value, echoed in resp */ ++ uint8_t act; /* command between backend and frontend */ ++ uint8_t cmd_len; ++ ++ uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE]; ++ uint16_t timeout_per_command; /* The command is issued by twice ++ the value in Backend. */ ++ uint16_t channel, id, lun; ++ uint16_t padding; ++ uint8_t sc_data_direction; /* for DMA_TO_DEVICE(1) ++ DMA_FROM_DEVICE(2) ++ DMA_NONE(3) requests */ ++ uint8_t nr_segments; /* Number of pieces of scatter-gather */ ++ ++ struct scsiif_request_segment { ++ grant_ref_t gref; ++ uint16_t offset; ++ uint16_t length; ++ } seg[VSCSIIF_SG_TABLESIZE]; ++ uint32_t reserved[3]; ++}; ++typedef struct vscsiif_request vscsiif_request_t; ++ ++struct vscsiif_response { ++ uint16_t rqid; ++ uint8_t padding; ++ uint8_t sense_len; ++ uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE]; ++ int32_t rslt; ++ uint32_t residual_len; /* request bufflen - ++ return the value from physical device */ ++ uint32_t reserved[36]; ++}; ++typedef struct vscsiif_response vscsiif_response_t; ++ ++DEFINE_RING_TYPES(vscsiif, struct vscsiif_request, struct vscsiif_response); ++ ++ ++#endif /*__XEN__PUBLIC_IO_SCSI_H__*/ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +Index: head-2008-11-25/include/xen/interface/kexec.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/kexec.h 2008-11-25 12:22:34.000000000 +0100 +@@ -0,0 +1,168 @@ ++/****************************************************************************** ++ * kexec.h - Public portion ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Xen port written by: ++ * - Simon 'Horms' Horman ++ * - Magnus Damm ++ */ ++ ++#ifndef _XEN_PUBLIC_KEXEC_H ++#define _XEN_PUBLIC_KEXEC_H ++ ++ ++/* This file describes the Kexec / Kdump hypercall interface for Xen. ++ * ++ * Kexec under vanilla Linux allows a user to reboot the physical machine ++ * into a new user-specified kernel. The Xen port extends this idea ++ * to allow rebooting of the machine from dom0. When kexec for dom0 ++ * is used to reboot, both the hypervisor and the domains get replaced ++ * with some other kernel. It is possible to kexec between vanilla ++ * Linux and Xen and back again. Xen to Xen works well too. ++ * ++ * The hypercall interface for kexec can be divided into three main ++ * types of hypercall operations: ++ * ++ * 1) Range information: ++ * This is used by the dom0 kernel to ask the hypervisor about various ++ * address information. This information is needed to allow kexec-tools ++ * to fill in the ELF headers for /proc/vmcore properly. ++ * ++ * 2) Load and unload of images: ++ * There are no big surprises here, the kexec binary from kexec-tools ++ * runs in userspace in dom0. The tool loads/unloads data into the ++ * dom0 kernel such as new kernel, initramfs and hypervisor. When ++ * loaded the dom0 kernel performs a load hypercall operation, and ++ * before releasing all page references the dom0 kernel calls unload. ++ * ++ * 3) Kexec operation: ++ * This is used to start a previously loaded kernel. ++ */ ++ ++#include "xen.h" ++ ++#if defined(__i386__) || defined(__x86_64__) ++#define KEXEC_XEN_NO_PAGES 17 ++#endif ++ ++/* ++ * Prototype for this hypercall is: ++ * int kexec_op(int cmd, void *args) ++ * @cmd == KEXEC_CMD_... ++ * KEXEC operation to perform ++ * @args == Operation-specific extra arguments (NULL if none). ++ */ ++ ++/* ++ * Kexec supports two types of operation: ++ * - kexec into a regular kernel, very similar to a standard reboot ++ * - KEXEC_TYPE_DEFAULT is used to specify this type ++ * - kexec into a special "crash kernel", aka kexec-on-panic ++ * - KEXEC_TYPE_CRASH is used to specify this type ++ * - parts of our system may be broken at kexec-on-panic time ++ * - the code should be kept as simple and self-contained as possible ++ */ ++ ++#define KEXEC_TYPE_DEFAULT 0 ++#define KEXEC_TYPE_CRASH 1 ++ ++ ++/* The kexec implementation for Xen allows the user to load two ++ * types of kernels, KEXEC_TYPE_DEFAULT and KEXEC_TYPE_CRASH. ++ * All data needed for a kexec reboot is kept in one xen_kexec_image_t ++ * per "instance". The data mainly consists of machine address lists to pages ++ * together with destination addresses. The data in xen_kexec_image_t ++ * is passed to the "code page" which is one page of code that performs ++ * the final relocations before jumping to the new kernel. ++ */ ++ ++typedef struct xen_kexec_image { ++#if defined(__i386__) || defined(__x86_64__) ++ unsigned long page_list[KEXEC_XEN_NO_PAGES]; ++#endif ++#if defined(__ia64__) ++ unsigned long reboot_code_buffer; ++#endif ++ unsigned long indirection_page; ++ unsigned long start_address; ++} xen_kexec_image_t; ++ ++/* ++ * Perform kexec having previously loaded a kexec or kdump kernel ++ * as appropriate. ++ * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] ++ */ ++#define KEXEC_CMD_kexec 0 ++typedef struct xen_kexec_exec { ++ int type; ++} xen_kexec_exec_t; ++ ++/* ++ * Load/Unload kernel image for kexec or kdump. ++ * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] ++ * image == relocation information for kexec (ignored for unload) [in] ++ */ ++#define KEXEC_CMD_kexec_load 1 ++#define KEXEC_CMD_kexec_unload 2 ++typedef struct xen_kexec_load { ++ int type; ++ xen_kexec_image_t image; ++} xen_kexec_load_t; ++ ++#define KEXEC_RANGE_MA_CRASH 0 /* machine address and size of crash area */ ++#define KEXEC_RANGE_MA_XEN 1 /* machine address and size of Xen itself */ ++#define KEXEC_RANGE_MA_CPU 2 /* machine address and size of a CPU note */ ++#define KEXEC_RANGE_MA_XENHEAP 3 /* machine address and size of xenheap ++ * Note that although this is adjacent ++ * to Xen it exists in a separate EFI ++ * region on ia64, and thus needs to be ++ * inserted into iomem_machine separately */ ++#define KEXEC_RANGE_MA_BOOT_PARAM 4 /* machine address and size of ++ * the ia64_boot_param */ ++#define KEXEC_RANGE_MA_EFI_MEMMAP 5 /* machine address and size of ++ * of the EFI Memory Map */ ++#define KEXEC_RANGE_MA_VMCOREINFO 6 /* machine address and size of vmcoreinfo */ ++ ++/* ++ * Find the address and size of certain memory areas ++ * range == KEXEC_RANGE_... [in] ++ * nr == physical CPU number (starting from 0) if KEXEC_RANGE_MA_CPU [in] ++ * size == number of bytes reserved in window [out] ++ * start == address of the first byte in the window [out] ++ */ ++#define KEXEC_CMD_kexec_get_range 3 ++typedef struct xen_kexec_range { ++ int range; ++ int nr; ++ unsigned long size; ++ unsigned long start; ++} xen_kexec_range_t; ++ ++#endif /* _XEN_PUBLIC_KEXEC_H */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +Index: head-2008-11-25/include/xen/interface/nmi.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/nmi.h 2007-06-12 13:14:19.000000000 +0200 +@@ -0,0 +1,78 @@ ++/****************************************************************************** ++ * nmi.h ++ * ++ * NMI callback registration and reason codes. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2005, Keir Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_NMI_H__ ++#define __XEN_PUBLIC_NMI_H__ ++ ++/* ++ * NMI reason codes: ++ * Currently these are x86-specific, stored in arch_shared_info.nmi_reason. ++ */ ++ /* I/O-check error reported via ISA port 0x61, bit 6. */ ++#define _XEN_NMIREASON_io_error 0 ++#define XEN_NMIREASON_io_error (1UL << _XEN_NMIREASON_io_error) ++ /* Parity error reported via ISA port 0x61, bit 7. */ ++#define _XEN_NMIREASON_parity_error 1 ++#define XEN_NMIREASON_parity_error (1UL << _XEN_NMIREASON_parity_error) ++ /* Unknown hardware-generated NMI. */ ++#define _XEN_NMIREASON_unknown 2 ++#define XEN_NMIREASON_unknown (1UL << _XEN_NMIREASON_unknown) ++ ++/* ++ * long nmi_op(unsigned int cmd, void *arg) ++ * NB. All ops return zero on success, else a negative error code. ++ */ ++ ++/* ++ * Register NMI callback for this (calling) VCPU. Currently this only makes ++ * sense for domain 0, vcpu 0. All other callers will be returned EINVAL. ++ * arg == pointer to xennmi_callback structure. ++ */ ++#define XENNMI_register_callback 0 ++struct xennmi_callback { ++ unsigned long handler_address; ++ unsigned long pad; ++}; ++typedef struct xennmi_callback xennmi_callback_t; ++DEFINE_XEN_GUEST_HANDLE(xennmi_callback_t); ++ ++/* ++ * Deregister NMI callback for this (calling) VCPU. ++ * arg == NULL. ++ */ ++#define XENNMI_unregister_callback 1 ++ ++#endif /* __XEN_PUBLIC_NMI_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +Index: head-2008-11-25/include/xen/interface/platform.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/platform.h 2008-09-25 13:55:33.000000000 +0200 +@@ -0,0 +1,346 @@ ++/****************************************************************************** ++ * platform.h ++ * ++ * Hardware platform operations. Intended for use by domain-0 kernel. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2002-2006, K Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_PLATFORM_H__ ++#define __XEN_PUBLIC_PLATFORM_H__ ++ ++#include "xen.h" ++ ++#define XENPF_INTERFACE_VERSION 0x03000001 ++ ++/* ++ * Set clock such that it would read after 00:00:00 UTC, ++ * 1 January, 1970 if the current system time was . ++ */ ++#define XENPF_settime 17 ++struct xenpf_settime { ++ /* IN variables. */ ++ uint32_t secs; ++ uint32_t nsecs; ++ uint64_t system_time; ++}; ++typedef struct xenpf_settime xenpf_settime_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_settime_t); ++ ++/* ++ * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type. ++ * On x86, @type is an architecture-defined MTRR memory type. ++ * On success, returns the MTRR that was used (@reg) and a handle that can ++ * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting. ++ * (x86-specific). ++ */ ++#define XENPF_add_memtype 31 ++struct xenpf_add_memtype { ++ /* IN variables. */ ++ xen_pfn_t mfn; ++ uint64_t nr_mfns; ++ uint32_t type; ++ /* OUT variables. */ ++ uint32_t handle; ++ uint32_t reg; ++}; ++typedef struct xenpf_add_memtype xenpf_add_memtype_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_add_memtype_t); ++ ++/* ++ * Tear down an existing memory-range type. If @handle is remembered then it ++ * should be passed in to accurately tear down the correct setting (in case ++ * of overlapping memory regions with differing types). If it is not known ++ * then @handle should be set to zero. In all cases @reg must be set. ++ * (x86-specific). ++ */ ++#define XENPF_del_memtype 32 ++struct xenpf_del_memtype { ++ /* IN variables. */ ++ uint32_t handle; ++ uint32_t reg; ++}; ++typedef struct xenpf_del_memtype xenpf_del_memtype_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_del_memtype_t); ++ ++/* Read current type of an MTRR (x86-specific). */ ++#define XENPF_read_memtype 33 ++struct xenpf_read_memtype { ++ /* IN variables. */ ++ uint32_t reg; ++ /* OUT variables. */ ++ xen_pfn_t mfn; ++ uint64_t nr_mfns; ++ uint32_t type; ++}; ++typedef struct xenpf_read_memtype xenpf_read_memtype_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_read_memtype_t); ++ ++#define XENPF_microcode_update 35 ++struct xenpf_microcode_update { ++ /* IN variables. */ ++ XEN_GUEST_HANDLE(const_void) data;/* Pointer to microcode data */ ++ uint32_t length; /* Length of microcode data. */ ++}; ++typedef struct xenpf_microcode_update xenpf_microcode_update_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_microcode_update_t); ++ ++#define XENPF_platform_quirk 39 ++#define QUIRK_NOIRQBALANCING 1 /* Do not restrict IO-APIC RTE targets */ ++#define QUIRK_IOAPIC_BAD_REGSEL 2 /* IO-APIC REGSEL forgets its value */ ++#define QUIRK_IOAPIC_GOOD_REGSEL 3 /* IO-APIC REGSEL behaves properly */ ++struct xenpf_platform_quirk { ++ /* IN variables. */ ++ uint32_t quirk_id; ++}; ++typedef struct xenpf_platform_quirk xenpf_platform_quirk_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t); ++ ++#define XENPF_firmware_info 50 ++#define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */ ++#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */ ++#define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */ ++struct xenpf_firmware_info { ++ /* IN variables. */ ++ uint32_t type; ++ uint32_t index; ++ /* OUT variables. */ ++ union { ++ struct { ++ /* Int13, Fn48: Check Extensions Present. */ ++ uint8_t device; /* %dl: bios device number */ ++ uint8_t version; /* %ah: major version */ ++ uint16_t interface_support; /* %cx: support bitmap */ ++ /* Int13, Fn08: Legacy Get Device Parameters. */ ++ uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */ ++ uint8_t legacy_max_head; /* %dh: max head # */ ++ uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */ ++ /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */ ++ /* NB. First uint16_t of buffer must be set to buffer size. */ ++ XEN_GUEST_HANDLE(void) edd_params; ++ } disk_info; /* XEN_FW_DISK_INFO */ ++ struct { ++ uint8_t device; /* bios device number */ ++ uint32_t mbr_signature; /* offset 0x1b8 in mbr */ ++ } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */ ++ struct { ++ /* Int10, AX=4F15: Get EDID info. */ ++ uint8_t capabilities; ++ uint8_t edid_transfer_time; ++ /* must refer to 128-byte buffer */ ++ XEN_GUEST_HANDLE(uint8) edid; ++ } vbeddc_info; /* XEN_FW_VBEDDC_INFO */ ++ } u; ++}; ++typedef struct xenpf_firmware_info xenpf_firmware_info_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t); ++ ++#define XENPF_enter_acpi_sleep 51 ++struct xenpf_enter_acpi_sleep { ++ /* IN variables */ ++ uint16_t pm1a_cnt_val; /* PM1a control value. */ ++ uint16_t pm1b_cnt_val; /* PM1b control value. */ ++ uint32_t sleep_state; /* Which state to enter (Sn). */ ++ uint32_t flags; /* Must be zero. */ ++}; ++typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_enter_acpi_sleep_t); ++ ++#define XENPF_change_freq 52 ++struct xenpf_change_freq { ++ /* IN variables */ ++ uint32_t flags; /* Must be zero. */ ++ uint32_t cpu; /* Physical cpu. */ ++ uint64_t freq; /* New frequency (Hz). */ ++}; ++typedef struct xenpf_change_freq xenpf_change_freq_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_change_freq_t); ++ ++/* ++ * Get idle times (nanoseconds since boot) for physical CPUs specified in the ++ * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is ++ * indexed by CPU number; only entries with the corresponding @cpumap_bitmap ++ * bit set are written to. On return, @cpumap_bitmap is modified so that any ++ * non-existent CPUs are cleared. Such CPUs have their @idletime array entry ++ * cleared. ++ */ ++#define XENPF_getidletime 53 ++struct xenpf_getidletime { ++ /* IN/OUT variables */ ++ /* IN: CPUs to interrogate; OUT: subset of IN which are present */ ++ XEN_GUEST_HANDLE(uint8) cpumap_bitmap; ++ /* IN variables */ ++ /* Size of cpumap bitmap. */ ++ uint32_t cpumap_nr_cpus; ++ /* Must be indexable for every cpu in cpumap_bitmap. */ ++ XEN_GUEST_HANDLE(uint64) idletime; ++ /* OUT variables */ ++ /* System time when the idletime snapshots were taken. */ ++ uint64_t now; ++}; ++typedef struct xenpf_getidletime xenpf_getidletime_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t); ++ ++#define XENPF_set_processor_pminfo 54 ++ ++/* ability bits */ ++#define XEN_PROCESSOR_PM_CX 1 ++#define XEN_PROCESSOR_PM_PX 2 ++#define XEN_PROCESSOR_PM_TX 4 ++ ++/* cmd type */ ++#define XEN_PM_CX 0 ++#define XEN_PM_PX 1 ++#define XEN_PM_TX 2 ++ ++/* Px sub info type */ ++#define XEN_PX_PCT 1 ++#define XEN_PX_PSS 2 ++#define XEN_PX_PPC 4 ++#define XEN_PX_PSD 8 ++ ++struct xen_power_register { ++ uint32_t space_id; ++ uint32_t bit_width; ++ uint32_t bit_offset; ++ uint32_t access_size; ++ uint64_t address; ++}; ++ ++struct xen_processor_csd { ++ uint32_t domain; /* domain number of one dependent group */ ++ uint32_t coord_type; /* coordination type */ ++ uint32_t num; /* number of processors in same domain */ ++}; ++typedef struct xen_processor_csd xen_processor_csd_t; ++DEFINE_XEN_GUEST_HANDLE(xen_processor_csd_t); ++ ++struct xen_processor_cx { ++ struct xen_power_register reg; /* GAS for Cx trigger register */ ++ uint8_t type; /* cstate value, c0: 0, c1: 1, ... */ ++ uint32_t latency; /* worst latency (ms) to enter/exit this cstate */ ++ uint32_t power; /* average power consumption(mW) */ ++ uint32_t dpcnt; /* number of dependency entries */ ++ XEN_GUEST_HANDLE(xen_processor_csd_t) dp; /* NULL if no dependency */ ++}; ++typedef struct xen_processor_cx xen_processor_cx_t; ++DEFINE_XEN_GUEST_HANDLE(xen_processor_cx_t); ++ ++struct xen_processor_flags { ++ uint32_t bm_control:1; ++ uint32_t bm_check:1; ++ uint32_t has_cst:1; ++ uint32_t power_setup_done:1; ++ uint32_t bm_rld_set:1; ++}; ++ ++struct xen_processor_power { ++ uint32_t count; /* number of C state entries in array below */ ++ struct xen_processor_flags flags; /* global flags of this processor */ ++ XEN_GUEST_HANDLE(xen_processor_cx_t) states; /* supported c states */ ++}; ++ ++struct xen_pct_register { ++ uint8_t descriptor; ++ uint16_t length; ++ uint8_t space_id; ++ uint8_t bit_width; ++ uint8_t bit_offset; ++ uint8_t reserved; ++ uint64_t address; ++}; ++ ++struct xen_processor_px { ++ uint64_t core_frequency; /* megahertz */ ++ uint64_t power; /* milliWatts */ ++ uint64_t transition_latency; /* microseconds */ ++ uint64_t bus_master_latency; /* microseconds */ ++ uint64_t control; /* control value */ ++ uint64_t status; /* success indicator */ ++}; ++typedef struct xen_processor_px xen_processor_px_t; ++DEFINE_XEN_GUEST_HANDLE(xen_processor_px_t); ++ ++struct xen_psd_package { ++ uint64_t num_entries; ++ uint64_t revision; ++ uint64_t domain; ++ uint64_t coord_type; ++ uint64_t num_processors; ++}; ++ ++struct xen_processor_performance { ++ uint32_t flags; /* flag for Px sub info type */ ++ uint32_t platform_limit; /* Platform limitation on freq usage */ ++ struct xen_pct_register control_register; ++ struct xen_pct_register status_register; ++ uint32_t state_count; /* total available performance states */ ++ XEN_GUEST_HANDLE(xen_processor_px_t) states; ++ struct xen_psd_package domain_info; ++ uint32_t shared_type; /* coordination type of this processor */ ++}; ++typedef struct xen_processor_performance xen_processor_performance_t; ++DEFINE_XEN_GUEST_HANDLE(xen_processor_performance_t); ++ ++struct xenpf_set_processor_pminfo { ++ /* IN variables */ ++ uint32_t id; /* ACPI CPU ID */ ++ uint32_t type; /* {XEN_PM_CX, XEN_PM_PX} */ ++ union { ++ struct xen_processor_power power;/* Cx: _CST/_CSD */ ++ struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */ ++ }; ++}; ++typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t); ++ ++struct xen_platform_op { ++ uint32_t cmd; ++ uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ ++ union { ++ struct xenpf_settime settime; ++ struct xenpf_add_memtype add_memtype; ++ struct xenpf_del_memtype del_memtype; ++ struct xenpf_read_memtype read_memtype; ++ struct xenpf_microcode_update microcode; ++ struct xenpf_platform_quirk platform_quirk; ++ struct xenpf_firmware_info firmware_info; ++ struct xenpf_enter_acpi_sleep enter_acpi_sleep; ++ struct xenpf_change_freq change_freq; ++ struct xenpf_getidletime getidletime; ++ struct xenpf_set_processor_pminfo set_pminfo; ++ uint8_t pad[128]; ++ } u; ++}; ++typedef struct xen_platform_op xen_platform_op_t; ++DEFINE_XEN_GUEST_HANDLE(xen_platform_op_t); ++ ++#endif /* __XEN_PUBLIC_PLATFORM_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +Index: head-2008-11-25/include/xen/interface/sysctl.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/sysctl.h 2008-09-25 13:55:33.000000000 +0200 +@@ -0,0 +1,308 @@ ++/****************************************************************************** ++ * sysctl.h ++ * ++ * System management operations. For use by node control stack. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2002-2006, K Fraser ++ */ ++ ++#ifndef __XEN_PUBLIC_SYSCTL_H__ ++#define __XEN_PUBLIC_SYSCTL_H__ ++ ++#if !defined(__XEN__) && !defined(__XEN_TOOLS__) ++#error "sysctl operations are intended for use by node control tools only" ++#endif ++ ++#include "xen.h" ++#include "domctl.h" ++ ++#define XEN_SYSCTL_INTERFACE_VERSION 0x00000006 ++ ++/* ++ * Read console content from Xen buffer ring. ++ */ ++#define XEN_SYSCTL_readconsole 1 ++struct xen_sysctl_readconsole { ++ /* IN: Non-zero -> clear after reading. */ ++ uint8_t clear; ++ /* IN: Non-zero -> start index specified by @index field. */ ++ uint8_t incremental; ++ uint8_t pad0, pad1; ++ /* ++ * IN: Start index for consuming from ring buffer (if @incremental); ++ * OUT: End index after consuming from ring buffer. ++ */ ++ uint32_t index; ++ /* IN: Virtual address to write console data. */ ++ XEN_GUEST_HANDLE_64(char) buffer; ++ /* IN: Size of buffer; OUT: Bytes written to buffer. */ ++ uint32_t count; ++}; ++typedef struct xen_sysctl_readconsole xen_sysctl_readconsole_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t); ++ ++/* Get trace buffers machine base address */ ++#define XEN_SYSCTL_tbuf_op 2 ++struct xen_sysctl_tbuf_op { ++ /* IN variables */ ++#define XEN_SYSCTL_TBUFOP_get_info 0 ++#define XEN_SYSCTL_TBUFOP_set_cpu_mask 1 ++#define XEN_SYSCTL_TBUFOP_set_evt_mask 2 ++#define XEN_SYSCTL_TBUFOP_set_size 3 ++#define XEN_SYSCTL_TBUFOP_enable 4 ++#define XEN_SYSCTL_TBUFOP_disable 5 ++ uint32_t cmd; ++ /* IN/OUT variables */ ++ struct xenctl_cpumap cpu_mask; ++ uint32_t evt_mask; ++ /* OUT variables */ ++ uint64_aligned_t buffer_mfn; ++ uint32_t size; ++}; ++typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t); ++ ++/* ++ * Get physical information about the host machine ++ */ ++#define XEN_SYSCTL_physinfo 3 ++ /* (x86) The platform supports HVM guests. */ ++#define _XEN_SYSCTL_PHYSCAP_hvm 0 ++#define XEN_SYSCTL_PHYSCAP_hvm (1u<<_XEN_SYSCTL_PHYSCAP_hvm) ++ /* (x86) The platform supports HVM-guest direct access to I/O devices. */ ++#define _XEN_SYSCTL_PHYSCAP_hvm_directio 1 ++#define XEN_SYSCTL_PHYSCAP_hvm_directio (1u<<_XEN_SYSCTL_PHYSCAP_hvm_directio) ++struct xen_sysctl_physinfo { ++ uint32_t threads_per_core; ++ uint32_t cores_per_socket; ++ uint32_t nr_cpus; ++ uint32_t nr_nodes; ++ uint32_t cpu_khz; ++ uint64_aligned_t total_pages; ++ uint64_aligned_t free_pages; ++ uint64_aligned_t scrub_pages; ++ uint32_t hw_cap[8]; ++ ++ /* ++ * IN: maximum addressable entry in the caller-provided cpu_to_node array. ++ * OUT: largest cpu identifier in the system. ++ * If OUT is greater than IN then the cpu_to_node array is truncated! ++ */ ++ uint32_t max_cpu_id; ++ /* ++ * If not NULL, this array is filled with node identifier for each cpu. ++ * If a cpu has no node information (e.g., cpu not present) then the ++ * sentinel value ~0u is written. ++ * The size of this array is specified by the caller in @max_cpu_id. ++ * If the actual @max_cpu_id is smaller than the array then the trailing ++ * elements of the array will not be written by the sysctl. ++ */ ++ XEN_GUEST_HANDLE_64(uint32) cpu_to_node; ++ ++ /* XEN_SYSCTL_PHYSCAP_??? */ ++ uint32_t capabilities; ++}; ++typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t); ++ ++/* ++ * Get the ID of the current scheduler. ++ */ ++#define XEN_SYSCTL_sched_id 4 ++struct xen_sysctl_sched_id { ++ /* OUT variable */ ++ uint32_t sched_id; ++}; ++typedef struct xen_sysctl_sched_id xen_sysctl_sched_id_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_sched_id_t); ++ ++/* Interface for controlling Xen software performance counters. */ ++#define XEN_SYSCTL_perfc_op 5 ++/* Sub-operations: */ ++#define XEN_SYSCTL_PERFCOP_reset 1 /* Reset all counters to zero. */ ++#define XEN_SYSCTL_PERFCOP_query 2 /* Get perfctr information. */ ++struct xen_sysctl_perfc_desc { ++ char name[80]; /* name of perf counter */ ++ uint32_t nr_vals; /* number of values for this counter */ ++}; ++typedef struct xen_sysctl_perfc_desc xen_sysctl_perfc_desc_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t); ++typedef uint32_t xen_sysctl_perfc_val_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t); ++ ++struct xen_sysctl_perfc_op { ++ /* IN variables. */ ++ uint32_t cmd; /* XEN_SYSCTL_PERFCOP_??? */ ++ /* OUT variables. */ ++ uint32_t nr_counters; /* number of counters description */ ++ uint32_t nr_vals; /* number of values */ ++ /* counter information (or NULL) */ ++ XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc; ++ /* counter values (or NULL) */ ++ XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val; ++}; ++typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t); ++ ++#define XEN_SYSCTL_getdomaininfolist 6 ++struct xen_sysctl_getdomaininfolist { ++ /* IN variables. */ ++ domid_t first_domain; ++ uint32_t max_domains; ++ XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer; ++ /* OUT variables. */ ++ uint32_t num_domains; ++}; ++typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t); ++ ++/* Inject debug keys into Xen. */ ++#define XEN_SYSCTL_debug_keys 7 ++struct xen_sysctl_debug_keys { ++ /* IN variables. */ ++ XEN_GUEST_HANDLE_64(char) keys; ++ uint32_t nr_keys; ++}; ++typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t); ++ ++/* Get physical CPU information. */ ++#define XEN_SYSCTL_getcpuinfo 8 ++struct xen_sysctl_cpuinfo { ++ uint64_aligned_t idletime; ++}; ++typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t); ++struct xen_sysctl_getcpuinfo { ++ /* IN variables. */ ++ uint32_t max_cpus; ++ XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info; ++ /* OUT variables. */ ++ uint32_t nr_cpus; ++}; ++typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); ++ ++#define XEN_SYSCTL_availheap 9 ++struct xen_sysctl_availheap { ++ /* IN variables. */ ++ uint32_t min_bitwidth; /* Smallest address width (zero if don't care). */ ++ uint32_t max_bitwidth; /* Largest address width (zero if don't care). */ ++ int32_t node; /* NUMA node of interest (-1 for all nodes). */ ++ /* OUT variables. */ ++ uint64_aligned_t avail_bytes;/* Bytes available in the specified region. */ ++}; ++typedef struct xen_sysctl_availheap xen_sysctl_availheap_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t); ++ ++#define XEN_SYSCTL_get_pmstat 10 ++struct pm_px_val { ++ uint64_aligned_t freq; /* Px core frequency */ ++ uint64_aligned_t residency; /* Px residency time */ ++ uint64_aligned_t count; /* Px transition count */ ++}; ++typedef struct pm_px_val pm_px_val_t; ++DEFINE_XEN_GUEST_HANDLE(pm_px_val_t); ++ ++struct pm_px_stat { ++ uint8_t total; /* total Px states */ ++ uint8_t usable; /* usable Px states */ ++ uint8_t last; /* last Px state */ ++ uint8_t cur; /* current Px state */ ++ XEN_GUEST_HANDLE_64(uint64) trans_pt; /* Px transition table */ ++ XEN_GUEST_HANDLE_64(pm_px_val_t) pt; ++}; ++typedef struct pm_px_stat pm_px_stat_t; ++DEFINE_XEN_GUEST_HANDLE(pm_px_stat_t); ++ ++struct pm_cx_stat { ++ uint32_t nr; /* entry nr in triggers & residencies, including C0 */ ++ uint32_t last; /* last Cx state */ ++ uint64_aligned_t idle_time; /* idle time from boot */ ++ XEN_GUEST_HANDLE_64(uint64) triggers; /* Cx trigger counts */ ++ XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */ ++}; ++ ++struct xen_sysctl_get_pmstat { ++#define PMSTAT_CATEGORY_MASK 0xf0 ++#define PMSTAT_PX 0x10 ++#define PMSTAT_CX 0x20 ++#define PMSTAT_get_max_px (PMSTAT_PX | 0x1) ++#define PMSTAT_get_pxstat (PMSTAT_PX | 0x2) ++#define PMSTAT_reset_pxstat (PMSTAT_PX | 0x3) ++#define PMSTAT_get_max_cx (PMSTAT_CX | 0x1) ++#define PMSTAT_get_cxstat (PMSTAT_CX | 0x2) ++#define PMSTAT_reset_cxstat (PMSTAT_CX | 0x3) ++ uint32_t type; ++ uint32_t cpuid; ++ union { ++ struct pm_px_stat getpx; ++ struct pm_cx_stat getcx; ++ /* other struct for tx, etc */ ++ } u; ++}; ++typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t); ++ ++#define XEN_SYSCTL_cpu_hotplug 11 ++struct xen_sysctl_cpu_hotplug { ++ /* IN variables */ ++ uint32_t cpu; /* Physical cpu. */ ++#define XEN_SYSCTL_CPU_HOTPLUG_ONLINE 0 ++#define XEN_SYSCTL_CPU_HOTPLUG_OFFLINE 1 ++ uint32_t op; /* hotplug opcode */ ++}; ++typedef struct xen_sysctl_cpu_hotplug xen_sysctl_cpu_hotplug_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_hotplug_t); ++ ++ ++struct xen_sysctl { ++ uint32_t cmd; ++ uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ ++ union { ++ struct xen_sysctl_readconsole readconsole; ++ struct xen_sysctl_tbuf_op tbuf_op; ++ struct xen_sysctl_physinfo physinfo; ++ struct xen_sysctl_sched_id sched_id; ++ struct xen_sysctl_perfc_op perfc_op; ++ struct xen_sysctl_getdomaininfolist getdomaininfolist; ++ struct xen_sysctl_debug_keys debug_keys; ++ struct xen_sysctl_getcpuinfo getcpuinfo; ++ struct xen_sysctl_availheap availheap; ++ struct xen_sysctl_get_pmstat get_pmstat; ++ struct xen_sysctl_cpu_hotplug cpu_hotplug; ++ uint8_t pad[128]; ++ } u; ++}; ++typedef struct xen_sysctl xen_sysctl_t; ++DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t); ++ ++#endif /* __XEN_PUBLIC_SYSCTL_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +Index: head-2008-11-25/include/xen/interface/trace.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/trace.h 2008-11-25 12:22:34.000000000 +0100 +@@ -0,0 +1,205 @@ ++/****************************************************************************** ++ * include/public/trace.h ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Mark Williamson, (C) 2004 Intel Research Cambridge ++ * Copyright (C) 2005 Bin Ren ++ */ ++ ++#ifndef __XEN_PUBLIC_TRACE_H__ ++#define __XEN_PUBLIC_TRACE_H__ ++ ++#define TRACE_EXTRA_MAX 7 ++#define TRACE_EXTRA_SHIFT 28 ++ ++/* Trace classes */ ++#define TRC_CLS_SHIFT 16 ++#define TRC_GEN 0x0001f000 /* General trace */ ++#define TRC_SCHED 0x0002f000 /* Xen Scheduler trace */ ++#define TRC_DOM0OP 0x0004f000 /* Xen DOM0 operation trace */ ++#define TRC_HVM 0x0008f000 /* Xen HVM trace */ ++#define TRC_MEM 0x0010f000 /* Xen memory trace */ ++#define TRC_PV 0x0020f000 /* Xen PV traces */ ++#define TRC_SHADOW 0x0040f000 /* Xen shadow tracing */ ++#define TRC_PM 0x0080f000 /* Xen power management trace */ ++#define TRC_ALL 0x0ffff000 ++#define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff) ++#define TRC_HD_CYCLE_FLAG (1UL<<31) ++#define TRC_HD_INCLUDES_CYCLE_COUNT(x) ( !!( (x) & TRC_HD_CYCLE_FLAG ) ) ++#define TRC_HD_EXTRA(x) (((x)>>TRACE_EXTRA_SHIFT)&TRACE_EXTRA_MAX) ++ ++/* Trace subclasses */ ++#define TRC_SUBCLS_SHIFT 12 ++ ++/* trace subclasses for SVM */ ++#define TRC_HVM_ENTRYEXIT 0x00081000 /* VMENTRY and #VMEXIT */ ++#define TRC_HVM_HANDLER 0x00082000 /* various HVM handlers */ ++ ++#define TRC_SCHED_MIN 0x00021000 /* Just runstate changes */ ++#define TRC_SCHED_VERBOSE 0x00028000 /* More inclusive scheduling */ ++ ++/* Trace events per class */ ++#define TRC_LOST_RECORDS (TRC_GEN + 1) ++#define TRC_TRACE_WRAP_BUFFER (TRC_GEN + 2) ++#define TRC_TRACE_CPU_CHANGE (TRC_GEN + 3) ++ ++#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1) ++#define TRC_SCHED_DOM_ADD (TRC_SCHED_VERBOSE + 1) ++#define TRC_SCHED_DOM_REM (TRC_SCHED_VERBOSE + 2) ++#define TRC_SCHED_SLEEP (TRC_SCHED_VERBOSE + 3) ++#define TRC_SCHED_WAKE (TRC_SCHED_VERBOSE + 4) ++#define TRC_SCHED_YIELD (TRC_SCHED_VERBOSE + 5) ++#define TRC_SCHED_BLOCK (TRC_SCHED_VERBOSE + 6) ++#define TRC_SCHED_SHUTDOWN (TRC_SCHED_VERBOSE + 7) ++#define TRC_SCHED_CTL (TRC_SCHED_VERBOSE + 8) ++#define TRC_SCHED_ADJDOM (TRC_SCHED_VERBOSE + 9) ++#define TRC_SCHED_SWITCH (TRC_SCHED_VERBOSE + 10) ++#define TRC_SCHED_S_TIMER_FN (TRC_SCHED_VERBOSE + 11) ++#define TRC_SCHED_T_TIMER_FN (TRC_SCHED_VERBOSE + 12) ++#define TRC_SCHED_DOM_TIMER_FN (TRC_SCHED_VERBOSE + 13) ++#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14) ++#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15) ++ ++#define TRC_MEM_PAGE_GRANT_MAP (TRC_MEM + 1) ++#define TRC_MEM_PAGE_GRANT_UNMAP (TRC_MEM + 2) ++#define TRC_MEM_PAGE_GRANT_TRANSFER (TRC_MEM + 3) ++ ++#define TRC_PV_HYPERCALL (TRC_PV + 1) ++#define TRC_PV_TRAP (TRC_PV + 3) ++#define TRC_PV_PAGE_FAULT (TRC_PV + 4) ++#define TRC_PV_FORCED_INVALID_OP (TRC_PV + 5) ++#define TRC_PV_EMULATE_PRIVOP (TRC_PV + 6) ++#define TRC_PV_EMULATE_4GB (TRC_PV + 7) ++#define TRC_PV_MATH_STATE_RESTORE (TRC_PV + 8) ++#define TRC_PV_PAGING_FIXUP (TRC_PV + 9) ++#define TRC_PV_GDT_LDT_MAPPING_FAULT (TRC_PV + 10) ++#define TRC_PV_PTWR_EMULATION (TRC_PV + 11) ++#define TRC_PV_PTWR_EMULATION_PAE (TRC_PV + 12) ++ /* Indicates that addresses in trace record are 64 bits */ ++#define TRC_64_FLAG (0x100) ++ ++#define TRC_SHADOW_NOT_SHADOW (TRC_SHADOW + 1) ++#define TRC_SHADOW_FAST_PROPAGATE (TRC_SHADOW + 2) ++#define TRC_SHADOW_FAST_MMIO (TRC_SHADOW + 3) ++#define TRC_SHADOW_FALSE_FAST_PATH (TRC_SHADOW + 4) ++#define TRC_SHADOW_MMIO (TRC_SHADOW + 5) ++#define TRC_SHADOW_FIXUP (TRC_SHADOW + 6) ++#define TRC_SHADOW_DOMF_DYING (TRC_SHADOW + 7) ++#define TRC_SHADOW_EMULATE (TRC_SHADOW + 8) ++#define TRC_SHADOW_EMULATE_UNSHADOW_USER (TRC_SHADOW + 9) ++#define TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ (TRC_SHADOW + 10) ++#define TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED (TRC_SHADOW + 11) ++#define TRC_SHADOW_WRMAP_BF (TRC_SHADOW + 12) ++#define TRC_SHADOW_PREALLOC_UNPIN (TRC_SHADOW + 13) ++#define TRC_SHADOW_RESYNC_FULL (TRC_SHADOW + 14) ++#define TRC_SHADOW_RESYNC_ONLY (TRC_SHADOW + 15) ++ ++/* trace events per subclass */ ++#define TRC_HVM_VMENTRY (TRC_HVM_ENTRYEXIT + 0x01) ++#define TRC_HVM_VMEXIT (TRC_HVM_ENTRYEXIT + 0x02) ++#define TRC_HVM_VMEXIT64 (TRC_HVM_ENTRYEXIT + TRC_64_FLAG + 0x02) ++#define TRC_HVM_PF_XEN (TRC_HVM_HANDLER + 0x01) ++#define TRC_HVM_PF_XEN64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x01) ++#define TRC_HVM_PF_INJECT (TRC_HVM_HANDLER + 0x02) ++#define TRC_HVM_PF_INJECT64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x02) ++#define TRC_HVM_INJ_EXC (TRC_HVM_HANDLER + 0x03) ++#define TRC_HVM_INJ_VIRQ (TRC_HVM_HANDLER + 0x04) ++#define TRC_HVM_REINJ_VIRQ (TRC_HVM_HANDLER + 0x05) ++#define TRC_HVM_IO_READ (TRC_HVM_HANDLER + 0x06) ++#define TRC_HVM_IO_WRITE (TRC_HVM_HANDLER + 0x07) ++#define TRC_HVM_CR_READ (TRC_HVM_HANDLER + 0x08) ++#define TRC_HVM_CR_READ64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x08) ++#define TRC_HVM_CR_WRITE (TRC_HVM_HANDLER + 0x09) ++#define TRC_HVM_CR_WRITE64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x09) ++#define TRC_HVM_DR_READ (TRC_HVM_HANDLER + 0x0A) ++#define TRC_HVM_DR_WRITE (TRC_HVM_HANDLER + 0x0B) ++#define TRC_HVM_MSR_READ (TRC_HVM_HANDLER + 0x0C) ++#define TRC_HVM_MSR_WRITE (TRC_HVM_HANDLER + 0x0D) ++#define TRC_HVM_CPUID (TRC_HVM_HANDLER + 0x0E) ++#define TRC_HVM_INTR (TRC_HVM_HANDLER + 0x0F) ++#define TRC_HVM_NMI (TRC_HVM_HANDLER + 0x10) ++#define TRC_HVM_SMI (TRC_HVM_HANDLER + 0x11) ++#define TRC_HVM_VMMCALL (TRC_HVM_HANDLER + 0x12) ++#define TRC_HVM_HLT (TRC_HVM_HANDLER + 0x13) ++#define TRC_HVM_INVLPG (TRC_HVM_HANDLER + 0x14) ++#define TRC_HVM_INVLPG64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x14) ++#define TRC_HVM_MCE (TRC_HVM_HANDLER + 0x15) ++#define TRC_HVM_IO_ASSIST (TRC_HVM_HANDLER + 0x16) ++#define TRC_HVM_IO_ASSIST64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x16) ++#define TRC_HVM_MMIO_ASSIST (TRC_HVM_HANDLER + 0x17) ++#define TRC_HVM_MMIO_ASSIST64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x17) ++#define TRC_HVM_CLTS (TRC_HVM_HANDLER + 0x18) ++#define TRC_HVM_LMSW (TRC_HVM_HANDLER + 0x19) ++#define TRC_HVM_LMSW64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19) ++ ++/* trace subclasses for power management */ ++#define TRC_PM_FREQ 0x00801000 /* xen cpu freq events */ ++#define TRC_PM_IDLE 0x00802000 /* xen cpu idle events */ ++ ++/* trace events for per class */ ++#define TRC_PM_FREQ_CHANGE (TRC_PM_FREQ + 0x01) ++#define TRC_PM_IDLE_ENTRY (TRC_PM_IDLE + 0x01) ++#define TRC_PM_IDLE_EXIT (TRC_PM_IDLE + 0x02) ++ ++/* This structure represents a single trace buffer record. */ ++struct t_rec { ++ uint32_t event:28; ++ uint32_t extra_u32:3; /* # entries in trailing extra_u32[] array */ ++ uint32_t cycles_included:1; /* u.cycles or u.no_cycles? */ ++ union { ++ struct { ++ uint32_t cycles_lo, cycles_hi; /* cycle counter timestamp */ ++ uint32_t extra_u32[7]; /* event data items */ ++ } cycles; ++ struct { ++ uint32_t extra_u32[7]; /* event data items */ ++ } nocycles; ++ } u; ++}; ++ ++/* ++ * This structure contains the metadata for a single trace buffer. The head ++ * field, indexes into an array of struct t_rec's. ++ */ ++struct t_buf { ++ /* Assume the data buffer size is X. X is generally not a power of 2. ++ * CONS and PROD are incremented modulo (2*X): ++ * 0 <= cons < 2*X ++ * 0 <= prod < 2*X ++ * This is done because addition modulo X breaks at 2^32 when X is not a ++ * power of 2: ++ * (((2^32 - 1) % X) + 1) % X != (2^32) % X ++ */ ++ uint32_t cons; /* Offset of next item to be consumed by control tools. */ ++ uint32_t prod; /* Offset of next item to be produced by Xen. */ ++ /* Records follow immediately after the meta-data header. */ ++}; ++ ++#endif /* __XEN_PUBLIC_TRACE_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +Index: head-2008-11-25/include/xen/interface/xen-compat.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/xen-compat.h 2008-09-01 12:07:31.000000000 +0200 +@@ -0,0 +1,44 @@ ++/****************************************************************************** ++ * xen-compat.h ++ * ++ * Guest OS interface to Xen. Compatibility layer. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (c) 2006, Christian Limpach ++ */ ++ ++#ifndef __XEN_PUBLIC_XEN_COMPAT_H__ ++#define __XEN_PUBLIC_XEN_COMPAT_H__ ++ ++#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030209 ++ ++#if defined(__XEN__) || defined(__XEN_TOOLS__) ++/* Xen is built with matching headers and implements the latest interface. */ ++#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__ ++#elif !defined(__XEN_INTERFACE_VERSION__) ++/* Guests which do not specify a version get the legacy interface. */ ++#define __XEN_INTERFACE_VERSION__ 0x00000000 ++#endif ++ ++#if __XEN_INTERFACE_VERSION__ > __XEN_LATEST_INTERFACE_VERSION__ ++#error "These header files do not support the requested interface version." ++#endif ++ ++#endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */ +Index: head-2008-11-25/include/xen/interface/xenoprof.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/xenoprof.h 2007-06-12 13:14:19.000000000 +0200 +@@ -0,0 +1,138 @@ ++/****************************************************************************** ++ * xenoprof.h ++ * ++ * Interface for enabling system wide profiling based on hardware performance ++ * counters ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Copyright (C) 2005 Hewlett-Packard Co. ++ * Written by Aravind Menon & Jose Renato Santos ++ */ ++ ++#ifndef __XEN_PUBLIC_XENOPROF_H__ ++#define __XEN_PUBLIC_XENOPROF_H__ ++ ++#include "xen.h" ++ ++/* ++ * Commands to HYPERVISOR_xenoprof_op(). ++ */ ++#define XENOPROF_init 0 ++#define XENOPROF_reset_active_list 1 ++#define XENOPROF_reset_passive_list 2 ++#define XENOPROF_set_active 3 ++#define XENOPROF_set_passive 4 ++#define XENOPROF_reserve_counters 5 ++#define XENOPROF_counter 6 ++#define XENOPROF_setup_events 7 ++#define XENOPROF_enable_virq 8 ++#define XENOPROF_start 9 ++#define XENOPROF_stop 10 ++#define XENOPROF_disable_virq 11 ++#define XENOPROF_release_counters 12 ++#define XENOPROF_shutdown 13 ++#define XENOPROF_get_buffer 14 ++#define XENOPROF_set_backtrace 15 ++#define XENOPROF_last_op 15 ++ ++#define MAX_OPROF_EVENTS 32 ++#define MAX_OPROF_DOMAINS 25 ++#define XENOPROF_CPU_TYPE_SIZE 64 ++ ++/* Xenoprof performance events (not Xen events) */ ++struct event_log { ++ uint64_t eip; ++ uint8_t mode; ++ uint8_t event; ++}; ++ ++/* PC value that indicates a special code */ ++#define XENOPROF_ESCAPE_CODE ~0UL ++/* Transient events for the xenoprof->oprofile cpu buf */ ++#define XENOPROF_TRACE_BEGIN 1 ++ ++/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */ ++struct xenoprof_buf { ++ uint32_t event_head; ++ uint32_t event_tail; ++ uint32_t event_size; ++ uint32_t vcpu_id; ++ uint64_t xen_samples; ++ uint64_t kernel_samples; ++ uint64_t user_samples; ++ uint64_t lost_samples; ++ struct event_log event_log[1]; ++}; ++#ifndef __XEN__ ++typedef struct xenoprof_buf xenoprof_buf_t; ++DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t); ++#endif ++ ++struct xenoprof_init { ++ int32_t num_events; ++ int32_t is_primary; ++ char cpu_type[XENOPROF_CPU_TYPE_SIZE]; ++}; ++typedef struct xenoprof_init xenoprof_init_t; ++DEFINE_XEN_GUEST_HANDLE(xenoprof_init_t); ++ ++struct xenoprof_get_buffer { ++ int32_t max_samples; ++ int32_t nbuf; ++ int32_t bufsize; ++ uint64_t buf_gmaddr; ++}; ++typedef struct xenoprof_get_buffer xenoprof_get_buffer_t; ++DEFINE_XEN_GUEST_HANDLE(xenoprof_get_buffer_t); ++ ++struct xenoprof_counter { ++ uint32_t ind; ++ uint64_t count; ++ uint32_t enabled; ++ uint32_t event; ++ uint32_t hypervisor; ++ uint32_t kernel; ++ uint32_t user; ++ uint64_t unit_mask; ++}; ++typedef struct xenoprof_counter xenoprof_counter_t; ++DEFINE_XEN_GUEST_HANDLE(xenoprof_counter_t); ++ ++typedef struct xenoprof_passive { ++ uint16_t domain_id; ++ int32_t max_samples; ++ int32_t nbuf; ++ int32_t bufsize; ++ uint64_t buf_gmaddr; ++} xenoprof_passive_t; ++DEFINE_XEN_GUEST_HANDLE(xenoprof_passive_t); ++ ++ ++#endif /* __XEN_PUBLIC_XENOPROF_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +Index: head-2008-11-25/include/xen/interface/xsm/acm.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/xsm/acm.h 2008-07-21 11:00:33.000000000 +0200 +@@ -0,0 +1,235 @@ ++/* ++ * acm.h: Xen access control module interface defintions ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Reiner Sailer ++ * Copyright (c) 2005, International Business Machines Corporation. ++ */ ++ ++#ifndef _XEN_PUBLIC_ACM_H ++#define _XEN_PUBLIC_ACM_H ++ ++#include "../xen.h" ++ ++/* if ACM_DEBUG defined, all hooks should ++ * print a short trace message (comment it out ++ * when not in testing mode ) ++ */ ++/* #define ACM_DEBUG */ ++ ++#ifdef ACM_DEBUG ++# define printkd(fmt, args...) printk(fmt,## args) ++#else ++# define printkd(fmt, args...) ++#endif ++ ++/* default ssid reference value if not supplied */ ++#define ACM_DEFAULT_SSID 0x0 ++#define ACM_DEFAULT_LOCAL_SSID 0x0 ++ ++/* Internal ACM ERROR types */ ++#define ACM_OK 0 ++#define ACM_UNDEF -1 ++#define ACM_INIT_SSID_ERROR -2 ++#define ACM_INIT_SOID_ERROR -3 ++#define ACM_ERROR -4 ++ ++/* External ACCESS DECISIONS */ ++#define ACM_ACCESS_PERMITTED 0 ++#define ACM_ACCESS_DENIED -111 ++#define ACM_NULL_POINTER_ERROR -200 ++ ++/* ++ Error codes reported in when trying to test for a new policy ++ These error codes are reported in an array of tuples where ++ each error code is followed by a parameter describing the error ++ more closely, such as a domain id. ++*/ ++#define ACM_EVTCHN_SHARING_VIOLATION 0x100 ++#define ACM_GNTTAB_SHARING_VIOLATION 0x101 ++#define ACM_DOMAIN_LOOKUP 0x102 ++#define ACM_CHWALL_CONFLICT 0x103 ++#define ACM_SSIDREF_IN_USE 0x104 ++ ++ ++/* primary policy in lower 4 bits */ ++#define ACM_NULL_POLICY 0 ++#define ACM_CHINESE_WALL_POLICY 1 ++#define ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY 2 ++#define ACM_POLICY_UNDEFINED 15 ++ ++/* combinations have secondary policy component in higher 4bit */ ++#define ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY \ ++ ((ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY << 4) | ACM_CHINESE_WALL_POLICY) ++ ++/* policy: */ ++#define ACM_POLICY_NAME(X) \ ++ ((X) == (ACM_NULL_POLICY)) ? "NULL" : \ ++ ((X) == (ACM_CHINESE_WALL_POLICY)) ? "CHINESE WALL" : \ ++ ((X) == (ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY)) ? "SIMPLE TYPE ENFORCEMENT" : \ ++ ((X) == (ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY)) ? "CHINESE WALL AND SIMPLE TYPE ENFORCEMENT" : \ ++ "UNDEFINED" ++ ++/* the following policy versions must be increased ++ * whenever the interpretation of the related ++ * policy's data structure changes ++ */ ++#define ACM_POLICY_VERSION 4 ++#define ACM_CHWALL_VERSION 1 ++#define ACM_STE_VERSION 1 ++ ++/* defines a ssid reference used by xen */ ++typedef uint32_t ssidref_t; ++ ++/* hooks that are known to domains */ ++#define ACMHOOK_none 0 ++#define ACMHOOK_sharing 1 ++#define ACMHOOK_authorization 2 ++#define ACMHOOK_conflictset 3 ++ ++/* -------security policy relevant type definitions-------- */ ++ ++/* type identifier; compares to "equal" or "not equal" */ ++typedef uint16_t domaintype_t; ++ ++/* CHINESE WALL POLICY DATA STRUCTURES ++ * ++ * current accumulated conflict type set: ++ * When a domain is started and has a type that is in ++ * a conflict set, the conflicting types are incremented in ++ * the aggregate set. When a domain is destroyed, the ++ * conflicting types to its type are decremented. ++ * If a domain has multiple types, this procedure works over ++ * all those types. ++ * ++ * conflict_aggregate_set[i] holds the number of ++ * running domains that have a conflict with type i. ++ * ++ * running_types[i] holds the number of running domains ++ * that include type i in their ssidref-referenced type set ++ * ++ * conflict_sets[i][j] is "0" if type j has no conflict ++ * with type i and is "1" otherwise. ++ */ ++/* high-16 = version, low-16 = check magic */ ++#define ACM_MAGIC 0x0001debc ++ ++/* size of the SHA1 hash identifying the XML policy from which the ++ binary policy was created */ ++#define ACM_SHA1_HASH_SIZE 20 ++ ++/* each offset in bytes from start of the struct they ++ * are part of */ ++ ++/* V3 of the policy buffer aded a version structure */ ++struct acm_policy_version ++{ ++ uint32_t major; ++ uint32_t minor; ++}; ++ ++ ++/* each buffer consists of all policy information for ++ * the respective policy given in the policy code ++ * ++ * acm_policy_buffer, acm_chwall_policy_buffer, ++ * and acm_ste_policy_buffer need to stay 32-bit aligned ++ * because we create binary policies also with external ++ * tools that assume packed representations (e.g. the java tool) ++ */ ++struct acm_policy_buffer { ++ uint32_t magic; ++ uint32_t policy_version; /* ACM_POLICY_VERSION */ ++ uint32_t len; ++ uint32_t policy_reference_offset; ++ uint32_t primary_policy_code; ++ uint32_t primary_buffer_offset; ++ uint32_t secondary_policy_code; ++ uint32_t secondary_buffer_offset; ++ struct acm_policy_version xml_pol_version; /* add in V3 */ ++ uint8_t xml_policy_hash[ACM_SHA1_HASH_SIZE]; /* added in V4 */ ++}; ++ ++ ++struct acm_policy_reference_buffer { ++ uint32_t len; ++}; ++ ++struct acm_chwall_policy_buffer { ++ uint32_t policy_version; /* ACM_CHWALL_VERSION */ ++ uint32_t policy_code; ++ uint32_t chwall_max_types; ++ uint32_t chwall_max_ssidrefs; ++ uint32_t chwall_max_conflictsets; ++ uint32_t chwall_ssid_offset; ++ uint32_t chwall_conflict_sets_offset; ++ uint32_t chwall_running_types_offset; ++ uint32_t chwall_conflict_aggregate_offset; ++}; ++ ++struct acm_ste_policy_buffer { ++ uint32_t policy_version; /* ACM_STE_VERSION */ ++ uint32_t policy_code; ++ uint32_t ste_max_types; ++ uint32_t ste_max_ssidrefs; ++ uint32_t ste_ssid_offset; ++}; ++ ++struct acm_stats_buffer { ++ uint32_t magic; ++ uint32_t len; ++ uint32_t primary_policy_code; ++ uint32_t primary_stats_offset; ++ uint32_t secondary_policy_code; ++ uint32_t secondary_stats_offset; ++}; ++ ++struct acm_ste_stats_buffer { ++ uint32_t ec_eval_count; ++ uint32_t gt_eval_count; ++ uint32_t ec_denied_count; ++ uint32_t gt_denied_count; ++ uint32_t ec_cachehit_count; ++ uint32_t gt_cachehit_count; ++}; ++ ++struct acm_ssid_buffer { ++ uint32_t len; ++ ssidref_t ssidref; ++ uint32_t policy_reference_offset; ++ uint32_t primary_policy_code; ++ uint32_t primary_max_types; ++ uint32_t primary_types_offset; ++ uint32_t secondary_policy_code; ++ uint32_t secondary_max_types; ++ uint32_t secondary_types_offset; ++}; ++ ++#endif ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +Index: head-2008-11-25/include/xen/interface/xsm/acm_ops.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/xsm/acm_ops.h 2007-10-22 13:39:15.000000000 +0200 +@@ -0,0 +1,159 @@ ++/* ++ * acm_ops.h: Xen access control module hypervisor commands ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ * ++ * Reiner Sailer ++ * Copyright (c) 2005,2006 International Business Machines Corporation. ++ */ ++ ++#ifndef __XEN_PUBLIC_ACM_OPS_H__ ++#define __XEN_PUBLIC_ACM_OPS_H__ ++ ++#include "../xen.h" ++#include "acm.h" ++ ++/* ++ * Make sure you increment the interface version whenever you modify this file! ++ * This makes sure that old versions of acm tools will stop working in a ++ * well-defined way (rather than crashing the machine, for instance). ++ */ ++#define ACM_INTERFACE_VERSION 0xAAAA000A ++ ++/************************************************************************/ ++ ++/* ++ * Prototype for this hypercall is: ++ * int acm_op(int cmd, void *args) ++ * @cmd == ACMOP_??? (access control module operation). ++ * @args == Operation-specific extra arguments (NULL if none). ++ */ ++ ++ ++#define ACMOP_setpolicy 1 ++struct acm_setpolicy { ++ /* IN */ ++ XEN_GUEST_HANDLE_64(void) pushcache; ++ uint32_t pushcache_size; ++}; ++ ++ ++#define ACMOP_getpolicy 2 ++struct acm_getpolicy { ++ /* IN */ ++ XEN_GUEST_HANDLE_64(void) pullcache; ++ uint32_t pullcache_size; ++}; ++ ++ ++#define ACMOP_dumpstats 3 ++struct acm_dumpstats { ++ /* IN */ ++ XEN_GUEST_HANDLE_64(void) pullcache; ++ uint32_t pullcache_size; ++}; ++ ++ ++#define ACMOP_getssid 4 ++#define ACM_GETBY_ssidref 1 ++#define ACM_GETBY_domainid 2 ++struct acm_getssid { ++ /* IN */ ++ uint32_t get_ssid_by; /* ACM_GETBY_* */ ++ union { ++ domaintype_t domainid; ++ ssidref_t ssidref; ++ } id; ++ XEN_GUEST_HANDLE_64(void) ssidbuf; ++ uint32_t ssidbuf_size; ++}; ++ ++#define ACMOP_getdecision 5 ++struct acm_getdecision { ++ /* IN */ ++ uint32_t get_decision_by1; /* ACM_GETBY_* */ ++ uint32_t get_decision_by2; /* ACM_GETBY_* */ ++ union { ++ domaintype_t domainid; ++ ssidref_t ssidref; ++ } id1; ++ union { ++ domaintype_t domainid; ++ ssidref_t ssidref; ++ } id2; ++ uint32_t hook; ++ /* OUT */ ++ uint32_t acm_decision; ++}; ++ ++ ++#define ACMOP_chgpolicy 6 ++struct acm_change_policy { ++ /* IN */ ++ XEN_GUEST_HANDLE_64(void) policy_pushcache; ++ uint32_t policy_pushcache_size; ++ XEN_GUEST_HANDLE_64(void) del_array; ++ uint32_t delarray_size; ++ XEN_GUEST_HANDLE_64(void) chg_array; ++ uint32_t chgarray_size; ++ /* OUT */ ++ /* array with error code */ ++ XEN_GUEST_HANDLE_64(void) err_array; ++ uint32_t errarray_size; ++}; ++ ++#define ACMOP_relabeldoms 7 ++struct acm_relabel_doms { ++ /* IN */ ++ XEN_GUEST_HANDLE_64(void) relabel_map; ++ uint32_t relabel_map_size; ++ /* OUT */ ++ XEN_GUEST_HANDLE_64(void) err_array; ++ uint32_t errarray_size; ++}; ++ ++/* future interface to Xen */ ++struct xen_acmctl { ++ uint32_t cmd; ++ uint32_t interface_version; ++ union { ++ struct acm_setpolicy setpolicy; ++ struct acm_getpolicy getpolicy; ++ struct acm_dumpstats dumpstats; ++ struct acm_getssid getssid; ++ struct acm_getdecision getdecision; ++ struct acm_change_policy change_policy; ++ struct acm_relabel_doms relabel_doms; ++ } u; ++}; ++ ++typedef struct xen_acmctl xen_acmctl_t; ++DEFINE_XEN_GUEST_HANDLE(xen_acmctl_t); ++ ++#endif /* __XEN_PUBLIC_ACM_OPS_H__ */ ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-set-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +Index: head-2008-11-25/include/xen/interface/xsm/flask_op.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/include/xen/interface/xsm/flask_op.h 2008-09-01 12:07:31.000000000 +0200 +@@ -0,0 +1,45 @@ ++/* ++ * This file contains the flask_op hypercall commands and definitions. ++ * ++ * Author: George Coker, ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2, ++ * as published by the Free Software Foundation. ++ */ ++ ++#ifndef __FLASK_OP_H__ ++#define __FLASK_OP_H__ ++ ++#define FLASK_LOAD 1 ++#define FLASK_GETENFORCE 2 ++#define FLASK_SETENFORCE 3 ++#define FLASK_CONTEXT_TO_SID 4 ++#define FLASK_SID_TO_CONTEXT 5 ++#define FLASK_ACCESS 6 ++#define FLASK_CREATE 7 ++#define FLASK_RELABEL 8 ++#define FLASK_USER 9 ++#define FLASK_POLICYVERS 10 ++#define FLASK_GETBOOL 11 ++#define FLASK_SETBOOL 12 ++#define FLASK_COMMITBOOLS 13 ++#define FLASK_MLS 14 ++#define FLASK_DISABLE 15 ++#define FLASK_GETAVC_THRESHOLD 16 ++#define FLASK_SETAVC_THRESHOLD 17 ++#define FLASK_AVC_HASHSTATS 18 ++#define FLASK_AVC_CACHESTATS 19 ++#define FLASK_MEMBER 20 ++ ++#define FLASK_LAST FLASK_MEMBER ++ ++typedef struct flask_op { ++ uint32_t cmd; ++ uint32_t size; ++ char *buf; ++} flask_op_t; ++ ++DEFINE_XEN_GUEST_HANDLE(flask_op_t); ++ ++#endif diff --git a/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-xen-arch.diff b/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-xen-arch.diff new file mode 100644 index 0000000000..4d55e4a5fe --- /dev/null +++ b/src/patches/suse-2.6.27.25/patches.xen/xen3-auto-xen-arch.diff @@ -0,0 +1,46515 @@ +Subject: xen3 xen-arch +From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 728:832aac894efd) +Patch-mainline: obsolete +Acked-by: jbeulich@novell.com + +List of files having Xen derivates (perhaps created during the merging +of newer kernel versions), for xen-port-patches.py to pick up (i.e. this +must be retained here until the XenSource tree has these in the right +places): ++++ linux/arch/x86/kernel/acpi/sleep-xen.c ++++ linux/arch/x86/kernel/cpu/common_64-xen.c ++++ linux/arch/x86/kernel/e820-xen.c ++++ linux/arch/x86/kernel/head-xen.c ++++ linux/arch/x86/kernel/head32-xen.c ++++ linux/arch/x86/kernel/ioport-xen.c ++++ linux/arch/x86/kernel/ipi-xen.c ++++ linux/arch/x86/kernel/ldt-xen.c ++++ linux/arch/x86/kernel/mpparse-xen.c ++++ linux/arch/x86/kernel/pci-nommu-xen.c ++++ linux/arch/x86/kernel/process-xen.c ++++ linux/arch/x86/kernel/setup-xen.c ++++ linux/arch/x86/kernel/setup_percpu-xen.c ++++ linux/arch/x86/kernel/smp-xen.c ++++ linux/arch/x86/mm/fault-xen.c ++++ linux/arch/x86/mm/ioremap-xen.c ++++ linux/arch/x86/mm/pageattr-xen.c ++++ linux/arch/x86/mm/pat-xen.c ++++ linux/arch/x86/mm/pgtable-xen.c ++++ linux/arch/x86/vdso/vdso32-setup-xen.c ++++ linux/drivers/char/mem-xen.c ++++ linux/include/asm-x86/mach-xen/asm/desc.h ++++ linux/include/asm-x86/mach-xen/asm/dma-mapping.h ++++ linux/include/asm-x86/mach-xen/asm/fixmap.h ++++ linux/include/asm-x86/mach-xen/asm/io.h ++++ linux/include/asm-x86/mach-xen/asm/irq_vectors.h ++++ linux/include/asm-x86/mach-xen/asm/irqflags.h ++++ linux/include/asm-x86/mach-xen/asm/mmu_context.h ++++ linux/include/asm-x86/mach-xen/asm/page.h ++++ linux/include/asm-x86/mach-xen/asm/pci.h ++++ linux/include/asm-x86/mach-xen/asm/pgalloc.h ++++ linux/include/asm-x86/mach-xen/asm/pgtable.h ++++ linux/include/asm-x86/mach-xen/asm/processor.h ++++ linux/include/asm-x86/mach-xen/asm/segment.h ++++ linux/include/asm-x86/mach-xen/asm/smp.h ++++ linux/include/asm-x86/mach-xen/asm/spinlock.h ++++ linux/include/asm-x86/mach-xen/asm/swiotlb.h ++++ linux/include/asm-x86/mach-xen/asm/system.h ++++ linux/include/asm-x86/mach-xen/asm/tlbflush.h ++++ linux/include/asm-x86/mach-xen/asm/xor.h + +List of files folded into their native counterparts (and hence removed +from this patch for xen-port-patches.py to not needlessly pick them up; +for reference, prefixed with the version the removal occured): +2.6.18/include/asm-x86/mach-xen/asm/pgtable-2level.h +2.6.18/include/asm-x86/mach-xen/asm/pgtable-2level-defs.h +2.6.19/include/asm-x86/mach-xen/asm/ptrace.h +2.6.23/arch/x86/kernel/vsyscall-note_32-xen.S +2.6.23/include/asm-x86/mach-xen/asm/ptrace_64.h +2.6.24/arch/x86/kernel/early_printk_32-xen.c +2.6.24/include/asm-x86/mach-xen/asm/arch_hooks_64.h +2.6.24/include/asm-x86/mach-xen/asm/bootsetup_64.h +2.6.24/include/asm-x86/mach-xen/asm/mmu_32.h +2.6.24/include/asm-x86/mach-xen/asm/mmu_64.h +2.6.24/include/asm-x86/mach-xen/asm/nmi_64.h +2.6.24/include/asm-x86/mach-xen/asm/setup.h +2.6.24/include/asm-x86/mach-xen/asm/time_64.h (added in 2.6.20) +2.6.24/include/asm-x86/mach-xen/mach_timer.h +2.6.25/arch/x86/ia32/syscall32-xen.c +2.6.25/arch/x86/ia32/syscall32_syscall-xen.S +2.6.25/arch/x86/ia32/vsyscall-int80.S +2.6.25/arch/x86/kernel/acpi/boot-xen.c +2.6.25/include/asm-x86/mach-xen/asm/msr.h +2.6.25/include/asm-x86/mach-xen/asm/page_32.h +2.6.25/include/asm-x86/mach-xen/asm/spinlock_32.h +2.6.25/include/asm-x86/mach-xen/asm/timer.h (added in 2.6.24) +2.6.25/include/asm-x86/mach-xen/asm/timer_64.h +2.6.25/include/asm-x86/mach-xen/mach_time.h +2.6.26/arch/x86/kernel/pci-dma_32-xen.c +2.6.26/arch/x86/kernel/pci-swiotlb_64-xen.c +2.6.26/include/asm-x86/mach-xen/asm/dma-mapping_32.h +2.6.26/include/asm-x86/mach-xen/asm/dma-mapping_64.h +2.6.26/include/asm-x86/mach-xen/asm/nmi.h (added in 2.6.24) +2.6.26/include/asm-x86/mach-xen/asm/scatterlist.h (added in 2.6.24) +2.6.26/include/asm-x86/mach-xen/asm/scatterlist_32.h +2.6.26/include/xen/xencomm.h +2.6.27/arch/x86/kernel/e820_32-xen.c +2.6.27/include/asm-x86/mach-xen/asm/e820.h (added in 2.6.24) +2.6.27/include/asm-x86/mach-xen/asm/e820_64.h +2.6.27/include/asm-x86/mach-xen/asm/hw_irq.h (added in 2.6.24) +2.6.27/include/asm-x86/mach-xen/asm/hw_irq_32.h +2.6.27/include/asm-x86/mach-xen/asm/hw_irq_64.h +2.6.27/include/asm-x86/mach-xen/asm/io_32.h +2.6.27/include/asm-x86/mach-xen/asm/io_64.h +2.6.27/include/asm-x86/mach-xen/asm/irq.h (added in 2.6.24) +2.6.27/include/asm-x86/mach-xen/asm/irq_64.h +2.6.27.8/include/asm-x86/mach-xen/asm/pci_64.h + +Index: head-2008-11-25/arch/x86/kernel/acpi/processor_extcntl_xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/acpi/processor_extcntl_xen.c 2008-10-01 15:43:24.000000000 +0200 +@@ -0,0 +1,209 @@ ++/* ++ * processor_extcntl_xen.c - interface to notify Xen ++ * ++ * Copyright (C) 2008, Intel corporation ++ * ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at ++ * your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along ++ * with this program; if not, write to the Free Software Foundation, Inc., ++ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++static int xen_cx_notifier(struct acpi_processor *pr, int action) ++{ ++ int ret, count = 0, i; ++ xen_platform_op_t op = { ++ .cmd = XENPF_set_processor_pminfo, ++ .interface_version = XENPF_INTERFACE_VERSION, ++ .u.set_pminfo.id = pr->acpi_id, ++ .u.set_pminfo.type = XEN_PM_CX, ++ }; ++ struct xen_processor_cx *data, *buf; ++ struct acpi_processor_cx *cx; ++ ++ if (action == PROCESSOR_PM_CHANGE) ++ return -EINVAL; ++ ++ /* Convert to Xen defined structure and hypercall */ ++ buf = kzalloc(pr->power.count * sizeof(struct xen_processor_cx), ++ GFP_KERNEL); ++ if (!buf) ++ return -ENOMEM; ++ ++ data = buf; ++ for (i = 1; i <= pr->power.count; i++) { ++ cx = &pr->power.states[i]; ++ /* Skip invalid cstate entry */ ++ if (!cx->valid) ++ continue; ++ ++ data->type = cx->type; ++ data->latency = cx->latency; ++ data->power = cx->power; ++ data->reg.space_id = cx->reg.space_id; ++ data->reg.bit_width = cx->reg.bit_width; ++ data->reg.bit_offset = cx->reg.bit_offset; ++ data->reg.access_size = cx->reg.reserved; ++ data->reg.address = cx->reg.address; ++ ++ /* Get dependency relationships */ ++ if (cx->csd_count) { ++ printk("Wow! _CSD is found. Not support for now!\n"); ++ kfree(buf); ++ return -EINVAL; ++ } else { ++ data->dpcnt = 0; ++ set_xen_guest_handle(data->dp, NULL); ++ } ++ ++ data++; ++ count++; ++ } ++ ++ if (!count) { ++ printk("No available Cx info for cpu %d\n", pr->acpi_id); ++ kfree(buf); ++ return -EINVAL; ++ } ++ ++ op.u.set_pminfo.power.count = count; ++ op.u.set_pminfo.power.flags.bm_control = pr->flags.bm_control; ++ op.u.set_pminfo.power.flags.bm_check = pr->flags.bm_check; ++ op.u.set_pminfo.power.flags.has_cst = pr->flags.has_cst; ++ op.u.set_pminfo.power.flags.power_setup_done = pr->flags.power_setup_done; ++ ++ set_xen_guest_handle(op.u.set_pminfo.power.states, buf); ++ ret = HYPERVISOR_platform_op(&op); ++ kfree(buf); ++ return ret; ++} ++ ++static int xen_px_notifier(struct acpi_processor *pr, int action) ++{ ++ int ret = -EINVAL; ++ xen_platform_op_t op = { ++ .cmd = XENPF_set_processor_pminfo, ++ .interface_version = XENPF_INTERFACE_VERSION, ++ .u.set_pminfo.id = pr->acpi_id, ++ .u.set_pminfo.type = XEN_PM_PX, ++ }; ++ struct xen_processor_performance *perf; ++ struct xen_processor_px *states = NULL; ++ struct acpi_processor_performance *px; ++ struct acpi_psd_package *pdomain; ++ ++ if (!pr) ++ return -EINVAL; ++ ++ perf = &op.u.set_pminfo.perf; ++ px = pr->performance; ++ ++ switch(action) { ++ case PROCESSOR_PM_CHANGE: ++ /* ppc dynamic handle */ ++ perf->flags = XEN_PX_PPC; ++ perf->platform_limit = pr->performance_platform_limit; ++ ++ ret = HYPERVISOR_platform_op(&op); ++ break; ++ ++ case PROCESSOR_PM_INIT: ++ /* px normal init */ ++ perf->flags = XEN_PX_PPC | ++ XEN_PX_PCT | ++ XEN_PX_PSS | ++ XEN_PX_PSD; ++ ++ /* ppc */ ++ perf->platform_limit = pr->performance_platform_limit; ++ ++ /* pct */ ++ xen_convert_pct_reg(&perf->control_register, &px->control_register); ++ xen_convert_pct_reg(&perf->status_register, &px->status_register); ++ ++ /* pss */ ++ perf->state_count = px->state_count; ++ states = kzalloc(px->state_count*sizeof(xen_processor_px_t),GFP_KERNEL); ++ if (!states) ++ return -ENOMEM; ++ xen_convert_pss_states(states, px->states, px->state_count); ++ set_xen_guest_handle(perf->states, states); ++ ++ /* psd */ ++ pdomain = &px->domain_info; ++ xen_convert_psd_pack(&perf->domain_info, pdomain); ++ if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL) ++ perf->shared_type = CPUFREQ_SHARED_TYPE_ALL; ++ else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY) ++ perf->shared_type = CPUFREQ_SHARED_TYPE_ANY; ++ else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL) ++ perf->shared_type = CPUFREQ_SHARED_TYPE_HW; ++ else { ++ ret = -ENODEV; ++ kfree(states); ++ break; ++ } ++ ++ ret = HYPERVISOR_platform_op(&op); ++ kfree(states); ++ break; ++ ++ default: ++ break; ++ } ++ ++ return ret; ++} ++ ++static int xen_tx_notifier(struct acpi_processor *pr, int action) ++{ ++ return -EINVAL; ++} ++static int xen_hotplug_notifier(struct acpi_processor *pr, int event) ++{ ++ return -EINVAL; ++} ++ ++static struct processor_extcntl_ops xen_extcntl_ops = { ++ .hotplug = xen_hotplug_notifier, ++}; ++ ++void arch_acpi_processor_init_extcntl(const struct processor_extcntl_ops **ops) ++{ ++ unsigned int pmbits = (xen_start_info->flags & SIF_PM_MASK) >> 8; ++ ++ if (!pmbits) ++ return; ++ if (pmbits & XEN_PROCESSOR_PM_CX) ++ xen_extcntl_ops.pm_ops[PM_TYPE_IDLE] = xen_cx_notifier; ++ if (pmbits & XEN_PROCESSOR_PM_PX) ++ xen_extcntl_ops.pm_ops[PM_TYPE_PERF] = xen_px_notifier; ++ if (pmbits & XEN_PROCESSOR_PM_TX) ++ xen_extcntl_ops.pm_ops[PM_TYPE_THR] = xen_tx_notifier; ++ ++ *ops = &xen_extcntl_ops; ++} ++EXPORT_SYMBOL(arch_acpi_processor_init_extcntl); +Index: head-2008-11-25/arch/x86/kernel/acpi/sleep_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/acpi/sleep_32-xen.c 2008-04-15 09:29:41.000000000 +0200 +@@ -0,0 +1,113 @@ ++/* ++ * sleep.c - x86-specific ACPI sleep support. ++ * ++ * Copyright (C) 2001-2003 Patrick Mochel ++ * Copyright (C) 2001-2003 Pavel Machek ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++ ++#ifndef CONFIG_ACPI_PV_SLEEP ++/* address in low memory of the wakeup routine. */ ++unsigned long acpi_wakeup_address = 0; ++unsigned long acpi_video_flags; ++extern char wakeup_start, wakeup_end; ++ ++extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); ++#endif ++ ++/** ++ * acpi_save_state_mem - save kernel state ++ * ++ * Create an identity mapped page table and copy the wakeup routine to ++ * low memory. ++ */ ++int acpi_save_state_mem(void) ++{ ++#ifndef CONFIG_ACPI_PV_SLEEP ++ if (!acpi_wakeup_address) ++ return 1; ++ memcpy((void *)acpi_wakeup_address, &wakeup_start, ++ &wakeup_end - &wakeup_start); ++ acpi_copy_wakeup_routine(acpi_wakeup_address); ++#endif ++ return 0; ++} ++ ++/* ++ * acpi_restore_state - undo effects of acpi_save_state_mem ++ */ ++void acpi_restore_state_mem(void) ++{ ++} ++ ++/** ++ * acpi_reserve_bootmem - do _very_ early ACPI initialisation ++ * ++ * We allocate a page from the first 1MB of memory for the wakeup ++ * routine for when we come back from a sleep state. The ++ * runtime allocator allows specification of <16MB pages, but not ++ * <1MB pages. ++ */ ++void __init acpi_reserve_bootmem(void) ++{ ++#ifndef CONFIG_ACPI_PV_SLEEP ++ if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) { ++ printk(KERN_ERR ++ "ACPI: Wakeup code way too big, S3 disabled.\n"); ++ return; ++ } ++ ++ acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); ++ if (!acpi_wakeup_address) ++ printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); ++#endif ++} ++ ++#ifndef CONFIG_ACPI_PV_SLEEP ++static int __init acpi_sleep_setup(char *str) ++{ ++ while ((str != NULL) && (*str != '\0')) { ++ if (strncmp(str, "s3_bios", 7) == 0) ++ acpi_video_flags = 1; ++ if (strncmp(str, "s3_mode", 7) == 0) ++ acpi_video_flags |= 2; ++ str = strchr(str, ','); ++ if (str != NULL) ++ str += strspn(str, ", \t"); ++ } ++ return 1; ++} ++ ++__setup("acpi_sleep=", acpi_sleep_setup); ++ ++static __init int reset_videomode_after_s3(struct dmi_system_id *d) ++{ ++ acpi_video_flags |= 2; ++ return 0; ++} ++ ++static __initdata struct dmi_system_id acpisleep_dmi_table[] = { ++ { /* Reset video mode after returning from ACPI S3 sleep */ ++ .callback = reset_videomode_after_s3, ++ .ident = "Toshiba Satellite 4030cdt", ++ .matches = { ++ DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), ++ }, ++ }, ++ {} ++}; ++ ++static int __init acpisleep_dmi_init(void) ++{ ++ dmi_check_system(acpisleep_dmi_table); ++ return 0; ++} ++ ++core_initcall(acpisleep_dmi_init); ++#endif /* CONFIG_ACPI_PV_SLEEP */ +Index: head-2008-11-25/arch/x86/kernel/apic_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/apic_32-xen.c 2007-06-12 13:12:48.000000000 +0200 +@@ -0,0 +1,155 @@ ++/* ++ * Local APIC handling, local APIC timers ++ * ++ * (c) 1999, 2000 Ingo Molnar ++ * ++ * Fixes ++ * Maciej W. Rozycki : Bits for genuine 82489DX APICs; ++ * thanks to Eric Gilmore ++ * and Rolf G. Tews ++ * for testing these extensively. ++ * Maciej W. Rozycki : Various updates and fixes. ++ * Mikael Pettersson : Power Management for UP-APIC. ++ * Pavel Machek and ++ * Mikael Pettersson : PM converted to driver model. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include "io_ports.h" ++ ++#ifndef CONFIG_XEN ++/* ++ * cpu_mask that denotes the CPUs that needs timer interrupt coming in as ++ * IPIs in place of local APIC timers ++ */ ++static cpumask_t timer_bcast_ipi; ++#endif ++ ++/* ++ * Knob to control our willingness to enable the local APIC. ++ */ ++int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ ++ ++/* ++ * Debug level ++ */ ++int apic_verbosity; ++ ++#ifndef CONFIG_XEN ++static int modern_apic(void) ++{ ++ unsigned int lvr, version; ++ /* AMD systems use old APIC versions, so check the CPU */ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && ++ boot_cpu_data.x86 >= 0xf) ++ return 1; ++ lvr = apic_read(APIC_LVR); ++ version = GET_APIC_VERSION(lvr); ++ return version >= 0x14; ++} ++#endif /* !CONFIG_XEN */ ++ ++/* ++ * 'what should we do if we get a hw irq event on an illegal vector'. ++ * each architecture has to answer this themselves. ++ */ ++void ack_bad_irq(unsigned int irq) ++{ ++ printk("unexpected IRQ trap at vector %02x\n", irq); ++ /* ++ * Currently unexpected vectors happen only on SMP and APIC. ++ * We _must_ ack these because every local APIC has only N ++ * irq slots per priority level, and a 'hanging, unacked' IRQ ++ * holds up an irq slot - in excessive cases (when multiple ++ * unexpected vectors occur) that might lock up the APIC ++ * completely. ++ * But only ack when the APIC is enabled -AK ++ */ ++ if (cpu_has_apic) ++ ack_APIC_irq(); ++} ++ ++int get_physical_broadcast(void) ++{ ++ return 0xff; ++} ++ ++#ifndef CONFIG_XEN ++#ifndef CONFIG_SMP ++static void up_apic_timer_interrupt_call(struct pt_regs *regs) ++{ ++ int cpu = smp_processor_id(); ++ ++ /* ++ * the NMI deadlock-detector uses this. ++ */ ++ per_cpu(irq_stat, cpu).apic_timer_irqs++; ++ ++ smp_local_timer_interrupt(regs); ++} ++#endif ++ ++void smp_send_timer_broadcast_ipi(struct pt_regs *regs) ++{ ++ cpumask_t mask; ++ ++ cpus_and(mask, cpu_online_map, timer_bcast_ipi); ++ if (!cpus_empty(mask)) { ++#ifdef CONFIG_SMP ++ send_IPI_mask(mask, LOCAL_TIMER_VECTOR); ++#else ++ /* ++ * We can directly call the apic timer interrupt handler ++ * in UP case. Minus all irq related functions ++ */ ++ up_apic_timer_interrupt_call(regs); ++#endif ++ } ++} ++#endif ++ ++int setup_profiling_timer(unsigned int multiplier) ++{ ++ return -EINVAL; ++} ++ ++/* ++ * This initializes the IO-APIC and APIC hardware if this is ++ * a UP kernel. ++ */ ++int __init APIC_init_uniprocessor (void) ++{ ++#ifdef CONFIG_X86_IO_APIC ++ if (smp_found_config) ++ if (!skip_ioapic_setup && nr_ioapics) ++ setup_IO_APIC(); ++#endif ++ ++ return 0; ++} +Index: head-2008-11-25/arch/x86/kernel/cpu/common-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/cpu/common-xen.c 2007-12-10 08:47:31.000000000 +0100 +@@ -0,0 +1,743 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_X86_LOCAL_APIC ++#include ++#include ++#include ++#else ++#ifdef CONFIG_XEN ++#define phys_pkg_id(a,b) a ++#endif ++#endif ++#include ++ ++#include "cpu.h" ++ ++DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); ++EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); ++ ++#ifndef CONFIG_XEN ++DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); ++EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); ++#endif ++ ++static int cachesize_override __cpuinitdata = -1; ++static int disable_x86_fxsr __cpuinitdata; ++static int disable_x86_serial_nr __cpuinitdata = 1; ++static int disable_x86_sep __cpuinitdata; ++ ++struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; ++ ++extern int disable_pse; ++ ++static void default_init(struct cpuinfo_x86 * c) ++{ ++ /* Not much we can do here... */ ++ /* Check if at least it has cpuid */ ++ if (c->cpuid_level == -1) { ++ /* No cpuid. It must be an ancient CPU */ ++ if (c->x86 == 4) ++ strcpy(c->x86_model_id, "486"); ++ else if (c->x86 == 3) ++ strcpy(c->x86_model_id, "386"); ++ } ++} ++ ++static struct cpu_dev default_cpu = { ++ .c_init = default_init, ++ .c_vendor = "Unknown", ++}; ++static struct cpu_dev * this_cpu = &default_cpu; ++ ++static int __init cachesize_setup(char *str) ++{ ++ get_option (&str, &cachesize_override); ++ return 1; ++} ++__setup("cachesize=", cachesize_setup); ++ ++int __cpuinit get_model_name(struct cpuinfo_x86 *c) ++{ ++ unsigned int *v; ++ char *p, *q; ++ ++ if (cpuid_eax(0x80000000) < 0x80000004) ++ return 0; ++ ++ v = (unsigned int *) c->x86_model_id; ++ cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); ++ cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); ++ cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); ++ c->x86_model_id[48] = 0; ++ ++ /* Intel chips right-justify this string for some dumb reason; ++ undo that brain damage */ ++ p = q = &c->x86_model_id[0]; ++ while ( *p == ' ' ) ++ p++; ++ if ( p != q ) { ++ while ( *p ) ++ *q++ = *p++; ++ while ( q <= &c->x86_model_id[48] ) ++ *q++ = '\0'; /* Zero-pad the rest */ ++ } ++ ++ return 1; ++} ++ ++ ++void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) ++{ ++ unsigned int n, dummy, ecx, edx, l2size; ++ ++ n = cpuid_eax(0x80000000); ++ ++ if (n >= 0x80000005) { ++ cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); ++ printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", ++ edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); ++ c->x86_cache_size=(ecx>>24)+(edx>>24); ++ } ++ ++ if (n < 0x80000006) /* Some chips just has a large L1. */ ++ return; ++ ++ ecx = cpuid_ecx(0x80000006); ++ l2size = ecx >> 16; ++ ++ /* do processor-specific cache resizing */ ++ if (this_cpu->c_size_cache) ++ l2size = this_cpu->c_size_cache(c,l2size); ++ ++ /* Allow user to override all this if necessary. */ ++ if (cachesize_override != -1) ++ l2size = cachesize_override; ++ ++ if ( l2size == 0 ) ++ return; /* Again, no L2 cache is possible */ ++ ++ c->x86_cache_size = l2size; ++ ++ printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", ++ l2size, ecx & 0xFF); ++} ++ ++/* Naming convention should be: [()] */ ++/* This table only is used unless init_() below doesn't set it; */ ++/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ ++ ++/* Look up CPU names by table lookup. */ ++static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) ++{ ++ struct cpu_model_info *info; ++ ++ if ( c->x86_model >= 16 ) ++ return NULL; /* Range check */ ++ ++ if (!this_cpu) ++ return NULL; ++ ++ info = this_cpu->c_models; ++ ++ while (info && info->family) { ++ if (info->family == c->x86) ++ return info->model_names[c->x86_model]; ++ info++; ++ } ++ return NULL; /* Not found */ ++} ++ ++ ++static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) ++{ ++ char *v = c->x86_vendor_id; ++ int i; ++ static int printed; ++ ++ for (i = 0; i < X86_VENDOR_NUM; i++) { ++ if (cpu_devs[i]) { ++ if (!strcmp(v,cpu_devs[i]->c_ident[0]) || ++ (cpu_devs[i]->c_ident[1] && ++ !strcmp(v,cpu_devs[i]->c_ident[1]))) { ++ c->x86_vendor = i; ++ if (!early) ++ this_cpu = cpu_devs[i]; ++ return; ++ } ++ } ++ } ++ if (!printed) { ++ printed++; ++ printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); ++ printk(KERN_ERR "CPU: Your system may be unstable.\n"); ++ } ++ c->x86_vendor = X86_VENDOR_UNKNOWN; ++ this_cpu = &default_cpu; ++} ++ ++ ++static int __init x86_fxsr_setup(char * s) ++{ ++ disable_x86_fxsr = 1; ++ return 1; ++} ++__setup("nofxsr", x86_fxsr_setup); ++ ++ ++static int __init x86_sep_setup(char * s) ++{ ++ disable_x86_sep = 1; ++ return 1; ++} ++__setup("nosep", x86_sep_setup); ++ ++ ++/* Standard macro to see if a specific flag is changeable */ ++static inline int flag_is_changeable_p(u32 flag) ++{ ++ u32 f1, f2; ++ ++ asm("pushfl\n\t" ++ "pushfl\n\t" ++ "popl %0\n\t" ++ "movl %0,%1\n\t" ++ "xorl %2,%0\n\t" ++ "pushl %0\n\t" ++ "popfl\n\t" ++ "pushfl\n\t" ++ "popl %0\n\t" ++ "popfl\n\t" ++ : "=&r" (f1), "=&r" (f2) ++ : "ir" (flag)); ++ ++ return ((f1^f2) & flag) != 0; ++} ++ ++ ++/* Probe for the CPUID instruction */ ++static int __cpuinit have_cpuid_p(void) ++{ ++ return flag_is_changeable_p(X86_EFLAGS_ID); ++} ++ ++/* Do minimum CPU detection early. ++ Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. ++ The others are not touched to avoid unwanted side effects. ++ ++ WARNING: this function is only called on the BP. Don't add code here ++ that is supposed to run on all CPUs. */ ++static void __init early_cpu_detect(void) ++{ ++ struct cpuinfo_x86 *c = &boot_cpu_data; ++ ++ c->x86_cache_alignment = 32; ++ ++ if (!have_cpuid_p()) ++ return; ++ ++ /* Get vendor name */ ++ cpuid(0x00000000, &c->cpuid_level, ++ (int *)&c->x86_vendor_id[0], ++ (int *)&c->x86_vendor_id[8], ++ (int *)&c->x86_vendor_id[4]); ++ ++ get_cpu_vendor(c, 1); ++ ++ c->x86 = 4; ++ if (c->cpuid_level >= 0x00000001) { ++ u32 junk, tfms, cap0, misc; ++ cpuid(0x00000001, &tfms, &misc, &junk, &cap0); ++ c->x86 = (tfms >> 8) & 15; ++ c->x86_model = (tfms >> 4) & 15; ++ if (c->x86 == 0xf) ++ c->x86 += (tfms >> 20) & 0xff; ++ if (c->x86 >= 0x6) ++ c->x86_model += ((tfms >> 16) & 0xF) << 4; ++ c->x86_mask = tfms & 15; ++ if (cap0 & (1<<19)) ++ c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; ++ } ++} ++ ++void __cpuinit generic_identify(struct cpuinfo_x86 * c) ++{ ++ u32 tfms, xlvl; ++ int ebx; ++ ++ if (have_cpuid_p()) { ++ /* Get vendor name */ ++ cpuid(0x00000000, &c->cpuid_level, ++ (int *)&c->x86_vendor_id[0], ++ (int *)&c->x86_vendor_id[8], ++ (int *)&c->x86_vendor_id[4]); ++ ++ get_cpu_vendor(c, 0); ++ /* Initialize the standard set of capabilities */ ++ /* Note that the vendor-specific code below might override */ ++ ++ /* Intel-defined flags: level 0x00000001 */ ++ if ( c->cpuid_level >= 0x00000001 ) { ++ u32 capability, excap; ++ cpuid(0x00000001, &tfms, &ebx, &excap, &capability); ++ c->x86_capability[0] = capability; ++ c->x86_capability[4] = excap; ++ c->x86 = (tfms >> 8) & 15; ++ c->x86_model = (tfms >> 4) & 15; ++ if (c->x86 == 0xf) ++ c->x86 += (tfms >> 20) & 0xff; ++ if (c->x86 >= 0x6) ++ c->x86_model += ((tfms >> 16) & 0xF) << 4; ++ c->x86_mask = tfms & 15; ++#ifdef CONFIG_X86_HT ++ c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); ++#else ++ c->apicid = (ebx >> 24) & 0xFF; ++#endif ++ } else { ++ /* Have CPUID level 0 only - unheard of */ ++ c->x86 = 4; ++ } ++ ++ /* AMD-defined flags: level 0x80000001 */ ++ xlvl = cpuid_eax(0x80000000); ++ if ( (xlvl & 0xffff0000) == 0x80000000 ) { ++ if ( xlvl >= 0x80000001 ) { ++ c->x86_capability[1] = cpuid_edx(0x80000001); ++ c->x86_capability[6] = cpuid_ecx(0x80000001); ++ } ++ if ( xlvl >= 0x80000004 ) ++ get_model_name(c); /* Default name */ ++ } ++ } ++ ++ early_intel_workaround(c); ++ ++#ifdef CONFIG_X86_HT ++ c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; ++#endif ++} ++ ++static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) ++{ ++ if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { ++ /* Disable processor serial number */ ++ unsigned long lo,hi; ++ rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi); ++ lo |= 0x200000; ++ wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi); ++ printk(KERN_NOTICE "CPU serial number disabled.\n"); ++ clear_bit(X86_FEATURE_PN, c->x86_capability); ++ ++ /* Disabling the serial number may affect the cpuid level */ ++ c->cpuid_level = cpuid_eax(0); ++ } ++} ++ ++static int __init x86_serial_nr_setup(char *s) ++{ ++ disable_x86_serial_nr = 0; ++ return 1; ++} ++__setup("serialnumber", x86_serial_nr_setup); ++ ++ ++ ++/* ++ * This does the hard work of actually picking apart the CPU stuff... ++ */ ++void __cpuinit identify_cpu(struct cpuinfo_x86 *c) ++{ ++ int i; ++ ++ c->loops_per_jiffy = loops_per_jiffy; ++ c->x86_cache_size = -1; ++ c->x86_vendor = X86_VENDOR_UNKNOWN; ++ c->cpuid_level = -1; /* CPUID not detected */ ++ c->x86_model = c->x86_mask = 0; /* So far unknown... */ ++ c->x86_vendor_id[0] = '\0'; /* Unset */ ++ c->x86_model_id[0] = '\0'; /* Unset */ ++ c->x86_max_cores = 1; ++ memset(&c->x86_capability, 0, sizeof c->x86_capability); ++ ++ if (!have_cpuid_p()) { ++ /* First of all, decide if this is a 486 or higher */ ++ /* It's a 486 if we can modify the AC flag */ ++ if ( flag_is_changeable_p(X86_EFLAGS_AC) ) ++ c->x86 = 4; ++ else ++ c->x86 = 3; ++ } ++ ++ generic_identify(c); ++ ++ printk(KERN_DEBUG "CPU: After generic identify, caps:"); ++ for (i = 0; i < NCAPINTS; i++) ++ printk(" %08lx", c->x86_capability[i]); ++ printk("\n"); ++ ++ if (this_cpu->c_identify) { ++ this_cpu->c_identify(c); ++ ++ printk(KERN_DEBUG "CPU: After vendor identify, caps:"); ++ for (i = 0; i < NCAPINTS; i++) ++ printk(" %08lx", c->x86_capability[i]); ++ printk("\n"); ++ } ++ ++ /* ++ * Vendor-specific initialization. In this section we ++ * canonicalize the feature flags, meaning if there are ++ * features a certain CPU supports which CPUID doesn't ++ * tell us, CPUID claiming incorrect flags, or other bugs, ++ * we handle them here. ++ * ++ * At the end of this section, c->x86_capability better ++ * indicate the features this CPU genuinely supports! ++ */ ++ if (this_cpu->c_init) ++ this_cpu->c_init(c); ++ ++ /* Disable the PN if appropriate */ ++ squash_the_stupid_serial_number(c); ++ ++ /* ++ * The vendor-specific functions might have changed features. Now ++ * we do "generic changes." ++ */ ++ ++ /* TSC disabled? */ ++ if ( tsc_disable ) ++ clear_bit(X86_FEATURE_TSC, c->x86_capability); ++ ++ /* FXSR disabled? */ ++ if (disable_x86_fxsr) { ++ clear_bit(X86_FEATURE_FXSR, c->x86_capability); ++ clear_bit(X86_FEATURE_XMM, c->x86_capability); ++ } ++ ++ /* SEP disabled? */ ++ if (disable_x86_sep) ++ clear_bit(X86_FEATURE_SEP, c->x86_capability); ++ ++ if (disable_pse) ++ clear_bit(X86_FEATURE_PSE, c->x86_capability); ++ ++ /* If the model name is still unset, do table lookup. */ ++ if ( !c->x86_model_id[0] ) { ++ char *p; ++ p = table_lookup_model(c); ++ if ( p ) ++ strcpy(c->x86_model_id, p); ++ else ++ /* Last resort... */ ++ sprintf(c->x86_model_id, "%02x/%02x", ++ c->x86, c->x86_model); ++ } ++ ++ /* Now the feature flags better reflect actual CPU features! */ ++ ++ printk(KERN_DEBUG "CPU: After all inits, caps:"); ++ for (i = 0; i < NCAPINTS; i++) ++ printk(" %08lx", c->x86_capability[i]); ++ printk("\n"); ++ ++ /* ++ * On SMP, boot_cpu_data holds the common feature set between ++ * all CPUs; so make sure that we indicate which features are ++ * common between the CPUs. The first time this routine gets ++ * executed, c == &boot_cpu_data. ++ */ ++ if ( c != &boot_cpu_data ) { ++ /* AND the already accumulated flags with these */ ++ for ( i = 0 ; i < NCAPINTS ; i++ ) ++ boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; ++ } ++ ++ /* Init Machine Check Exception if available. */ ++ mcheck_init(c); ++ ++ if (c == &boot_cpu_data) ++ sysenter_setup(); ++ enable_sep_cpu(); ++ ++ if (c == &boot_cpu_data) ++ mtrr_bp_init(); ++ else ++ mtrr_ap_init(); ++} ++ ++#ifdef CONFIG_X86_HT ++void __cpuinit detect_ht(struct cpuinfo_x86 *c) ++{ ++ u32 eax, ebx, ecx, edx; ++ int index_msb, core_bits; ++ ++ cpuid(1, &eax, &ebx, &ecx, &edx); ++ ++ if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) ++ return; ++ ++ smp_num_siblings = (ebx & 0xff0000) >> 16; ++ ++ if (smp_num_siblings == 1) { ++ printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); ++ } else if (smp_num_siblings > 1 ) { ++ ++ if (smp_num_siblings > NR_CPUS) { ++ printk(KERN_WARNING "CPU: Unsupported number of the " ++ "siblings %d", smp_num_siblings); ++ smp_num_siblings = 1; ++ return; ++ } ++ ++ index_msb = get_count_order(smp_num_siblings); ++ c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); ++ ++ printk(KERN_INFO "CPU: Physical Processor ID: %d\n", ++ c->phys_proc_id); ++ ++ smp_num_siblings = smp_num_siblings / c->x86_max_cores; ++ ++ index_msb = get_count_order(smp_num_siblings) ; ++ ++ core_bits = get_count_order(c->x86_max_cores); ++ ++ c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & ++ ((1 << core_bits) - 1); ++ ++ if (c->x86_max_cores > 1) ++ printk(KERN_INFO "CPU: Processor Core ID: %d\n", ++ c->cpu_core_id); ++ } ++} ++#endif ++ ++void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) ++{ ++ char *vendor = NULL; ++ ++ if (c->x86_vendor < X86_VENDOR_NUM) ++ vendor = this_cpu->c_vendor; ++ else if (c->cpuid_level >= 0) ++ vendor = c->x86_vendor_id; ++ ++ if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) ++ printk("%s ", vendor); ++ ++ if (!c->x86_model_id[0]) ++ printk("%d86", c->x86); ++ else ++ printk("%s", c->x86_model_id); ++ ++ if (c->x86_mask || c->cpuid_level >= 0) ++ printk(" stepping %02x\n", c->x86_mask); ++ else ++ printk("\n"); ++} ++ ++cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; ++ ++/* This is hacky. :) ++ * We're emulating future behavior. ++ * In the future, the cpu-specific init functions will be called implicitly ++ * via the magic of initcalls. ++ * They will insert themselves into the cpu_devs structure. ++ * Then, when cpu_init() is called, we can just iterate over that array. ++ */ ++ ++extern int intel_cpu_init(void); ++extern int cyrix_init_cpu(void); ++extern int nsc_init_cpu(void); ++extern int amd_init_cpu(void); ++extern int centaur_init_cpu(void); ++extern int transmeta_init_cpu(void); ++extern int rise_init_cpu(void); ++extern int nexgen_init_cpu(void); ++extern int umc_init_cpu(void); ++ ++void __init early_cpu_init(void) ++{ ++ intel_cpu_init(); ++ cyrix_init_cpu(); ++ nsc_init_cpu(); ++ amd_init_cpu(); ++ centaur_init_cpu(); ++ transmeta_init_cpu(); ++ rise_init_cpu(); ++ nexgen_init_cpu(); ++ umc_init_cpu(); ++ early_cpu_detect(); ++ ++#ifdef CONFIG_DEBUG_PAGEALLOC ++ /* pse is not compatible with on-the-fly unmapping, ++ * disable it even if the cpus claim to support it. ++ */ ++ clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); ++ disable_pse = 1; ++#endif ++} ++ ++static void __cpuinit cpu_gdt_init(const struct Xgt_desc_struct *gdt_descr) ++{ ++ unsigned long frames[16]; ++ unsigned long va; ++ int f; ++ ++ for (va = gdt_descr->address, f = 0; ++ va < gdt_descr->address + gdt_descr->size; ++ va += PAGE_SIZE, f++) { ++ frames[f] = virt_to_mfn(va); ++ make_lowmem_page_readonly( ++ (void *)va, XENFEAT_writable_descriptor_tables); ++ } ++ if (HYPERVISOR_set_gdt(frames, (gdt_descr->size + 1) / 8)) ++ BUG(); ++} ++ ++/* ++ * cpu_init() initializes state that is per-CPU. Some data is already ++ * initialized (naturally) in the bootstrap process, such as the GDT ++ * and IDT. We reload them nevertheless, this function acts as a ++ * 'CPU state barrier', nothing should get across. ++ */ ++void __cpuinit cpu_init(void) ++{ ++ int cpu = smp_processor_id(); ++#ifndef CONFIG_X86_NO_TSS ++ struct tss_struct * t = &per_cpu(init_tss, cpu); ++#endif ++ struct thread_struct *thread = ¤t->thread; ++ struct desc_struct *gdt; ++ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); ++ ++ if (cpu_test_and_set(cpu, cpu_initialized)) { ++ printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); ++ for (;;) local_irq_enable(); ++ } ++ printk(KERN_INFO "Initializing CPU#%d\n", cpu); ++ ++ if (cpu_has_vme || cpu_has_de) ++ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); ++ if (tsc_disable && cpu_has_tsc) { ++ printk(KERN_NOTICE "Disabling TSC...\n"); ++ /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ ++ clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); ++ set_in_cr4(X86_CR4_TSD); ++ } ++ ++#ifndef CONFIG_XEN ++ /* The CPU hotplug case */ ++ if (cpu_gdt_descr->address) { ++ gdt = (struct desc_struct *)cpu_gdt_descr->address; ++ memset(gdt, 0, PAGE_SIZE); ++ goto old_gdt; ++ } ++ /* ++ * This is a horrible hack to allocate the GDT. The problem ++ * is that cpu_init() is called really early for the boot CPU ++ * (and hence needs bootmem) but much later for the secondary ++ * CPUs, when bootmem will have gone away ++ */ ++ if (NODE_DATA(0)->bdata->node_bootmem_map) { ++ gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); ++ /* alloc_bootmem_pages panics on failure, so no check */ ++ memset(gdt, 0, PAGE_SIZE); ++ } else { ++ gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); ++ if (unlikely(!gdt)) { ++ printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); ++ for (;;) ++ local_irq_enable(); ++ } ++ } ++old_gdt: ++ /* ++ * Initialize the per-CPU GDT with the boot GDT, ++ * and set up the GDT descriptor: ++ */ ++ memcpy(gdt, cpu_gdt_table, GDT_SIZE); ++ ++ /* Set up GDT entry for 16bit stack */ ++ *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= ++ ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | ++ ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | ++ (CPU_16BIT_STACK_SIZE - 1); ++ ++ cpu_gdt_descr->size = GDT_SIZE - 1; ++ cpu_gdt_descr->address = (unsigned long)gdt; ++#else ++ if (cpu == 0 && cpu_gdt_descr->address == 0) { ++ gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); ++ /* alloc_bootmem_pages panics on failure, so no check */ ++ memset(gdt, 0, PAGE_SIZE); ++ ++ memcpy(gdt, cpu_gdt_table, GDT_SIZE); ++ ++ cpu_gdt_descr->size = GDT_SIZE; ++ cpu_gdt_descr->address = (unsigned long)gdt; ++ } ++#endif ++ ++ cpu_gdt_init(cpu_gdt_descr); ++ ++ /* ++ * Set up and load the per-CPU TSS and LDT ++ */ ++ atomic_inc(&init_mm.mm_count); ++ current->active_mm = &init_mm; ++ if (current->mm) ++ BUG(); ++ enter_lazy_tlb(&init_mm, current); ++ ++ load_esp0(t, thread); ++ ++ load_LDT(&init_mm.context); ++ ++#ifdef CONFIG_DOUBLEFAULT ++ /* Set up doublefault TSS pointer in the GDT */ ++ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); ++#endif ++ ++ /* Clear %fs and %gs. */ ++ asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); ++ ++ /* Clear all 6 debug registers: */ ++ set_debugreg(0, 0); ++ set_debugreg(0, 1); ++ set_debugreg(0, 2); ++ set_debugreg(0, 3); ++ set_debugreg(0, 6); ++ set_debugreg(0, 7); ++ ++ /* ++ * Force FPU initialization: ++ */ ++ current_thread_info()->status = 0; ++ clear_used_math(); ++ mxcsr_feature_mask_init(); ++} ++ ++#ifdef CONFIG_HOTPLUG_CPU ++void __cpuinit cpu_uninit(void) ++{ ++ int cpu = raw_smp_processor_id(); ++ cpu_clear(cpu, cpu_initialized); ++ ++ /* lazy TLB state */ ++ per_cpu(cpu_tlbstate, cpu).state = 0; ++ per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; ++} ++#endif +Index: head-2008-11-25/arch/x86/kernel/cpu/mtrr/main-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-01-28 12:24:18.000000000 +0100 +@@ -0,0 +1,198 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include "mtrr.h" ++ ++static DEFINE_MUTEX(mtrr_mutex); ++ ++void generic_get_mtrr(unsigned int reg, unsigned long *base, ++ unsigned int *size, mtrr_type * type) ++{ ++ struct xen_platform_op op; ++ ++ op.cmd = XENPF_read_memtype; ++ op.u.read_memtype.reg = reg; ++ if (unlikely(HYPERVISOR_platform_op(&op))) ++ memset(&op.u.read_memtype, 0, sizeof(op.u.read_memtype)); ++ ++ *size = op.u.read_memtype.nr_mfns; ++ *base = op.u.read_memtype.mfn; ++ *type = op.u.read_memtype.type; ++} ++ ++struct mtrr_ops generic_mtrr_ops = { ++ .use_intel_if = 1, ++ .get = generic_get_mtrr, ++}; ++ ++struct mtrr_ops *mtrr_if = &generic_mtrr_ops; ++unsigned int num_var_ranges; ++unsigned int *usage_table; ++ ++static void __init set_num_var_ranges(void) ++{ ++ struct xen_platform_op op; ++ ++ for (num_var_ranges = 0; ; num_var_ranges++) { ++ op.cmd = XENPF_read_memtype; ++ op.u.read_memtype.reg = num_var_ranges; ++ if (HYPERVISOR_platform_op(&op) != 0) ++ break; ++ } ++} ++ ++static void __init init_table(void) ++{ ++ int i, max; ++ ++ max = num_var_ranges; ++ if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL)) ++ == NULL) { ++ printk(KERN_ERR "mtrr: could not allocate\n"); ++ return; ++ } ++ for (i = 0; i < max; i++) ++ usage_table[i] = 0; ++} ++ ++int mtrr_add_page(unsigned long base, unsigned long size, ++ unsigned int type, char increment) ++{ ++ int error; ++ struct xen_platform_op op; ++ ++ mutex_lock(&mtrr_mutex); ++ ++ op.cmd = XENPF_add_memtype; ++ op.u.add_memtype.mfn = base; ++ op.u.add_memtype.nr_mfns = size; ++ op.u.add_memtype.type = type; ++ error = HYPERVISOR_platform_op(&op); ++ if (error) { ++ mutex_unlock(&mtrr_mutex); ++ BUG_ON(error > 0); ++ return error; ++ } ++ ++ if (increment) ++ ++usage_table[op.u.add_memtype.reg]; ++ ++ mutex_unlock(&mtrr_mutex); ++ ++ return op.u.add_memtype.reg; ++} ++ ++static int mtrr_check(unsigned long base, unsigned long size) ++{ ++ if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { ++ printk(KERN_WARNING ++ "mtrr: size and base must be multiples of 4 kiB\n"); ++ printk(KERN_DEBUG ++ "mtrr: size: 0x%lx base: 0x%lx\n", size, base); ++ dump_stack(); ++ return -1; ++ } ++ return 0; ++} ++ ++int ++mtrr_add(unsigned long base, unsigned long size, unsigned int type, ++ char increment) ++{ ++ if (mtrr_check(base, size)) ++ return -EINVAL; ++ return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, ++ increment); ++} ++ ++int mtrr_del_page(int reg, unsigned long base, unsigned long size) ++{ ++ unsigned i; ++ mtrr_type ltype; ++ unsigned long lbase; ++ unsigned int lsize; ++ int error = -EINVAL; ++ struct xen_platform_op op; ++ ++ mutex_lock(&mtrr_mutex); ++ ++ if (reg < 0) { ++ /* Search for existing MTRR */ ++ for (i = 0; i < num_var_ranges; ++i) { ++ mtrr_if->get(i, &lbase, &lsize, <ype); ++ if (lbase == base && lsize == size) { ++ reg = i; ++ break; ++ } ++ } ++ if (reg < 0) { ++ printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base, ++ size); ++ goto out; ++ } ++ } ++ if (usage_table[reg] < 1) { ++ printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); ++ goto out; ++ } ++ if (--usage_table[reg] < 1) { ++ op.cmd = XENPF_del_memtype; ++ op.u.del_memtype.handle = 0; ++ op.u.del_memtype.reg = reg; ++ error = HYPERVISOR_platform_op(&op); ++ if (error) { ++ BUG_ON(error > 0); ++ goto out; ++ } ++ } ++ error = reg; ++ out: ++ mutex_unlock(&mtrr_mutex); ++ return error; ++} ++ ++int ++mtrr_del(int reg, unsigned long base, unsigned long size) ++{ ++ if (mtrr_check(base, size)) ++ return -EINVAL; ++ return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); ++} ++ ++EXPORT_SYMBOL(mtrr_add); ++EXPORT_SYMBOL(mtrr_del); ++ ++void __init mtrr_bp_init(void) ++{ ++} ++ ++void mtrr_ap_init(void) ++{ ++} ++ ++static int __init mtrr_init(void) ++{ ++ struct cpuinfo_x86 *c = &boot_cpu_data; ++ ++ if (!is_initial_xendomain()) ++ return -ENODEV; ++ ++ if ((!cpu_has(c, X86_FEATURE_MTRR)) && ++ (!cpu_has(c, X86_FEATURE_K6_MTRR)) && ++ (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) && ++ (!cpu_has(c, X86_FEATURE_CENTAUR_MCR))) ++ return -ENODEV; ++ ++ set_num_var_ranges(); ++ init_table(); ++ ++ return 0; ++} ++ ++subsys_initcall(mtrr_init); +Index: head-2008-11-25/arch/x86/kernel/entry_32-xen.S +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/entry_32-xen.S 2007-12-10 08:47:31.000000000 +0100 +@@ -0,0 +1,1238 @@ ++/* ++ * linux/arch/i386/entry.S ++ * ++ * Copyright (C) 1991, 1992 Linus Torvalds ++ */ ++ ++/* ++ * entry.S contains the system-call and fault low-level handling routines. ++ * This also contains the timer-interrupt handler, as well as all interrupts ++ * and faults that can result in a task-switch. ++ * ++ * NOTE: This code handles signal-recognition, which happens every time ++ * after a timer-interrupt and after each system call. ++ * ++ * I changed all the .align's to 4 (16 byte alignment), as that's faster ++ * on a 486. ++ * ++ * Stack layout in 'ret_from_system_call': ++ * ptrace needs to have all regs on the stack. ++ * if the order here is changed, it needs to be ++ * updated in fork.c:copy_process, signal.c:do_signal, ++ * ptrace.c and ptrace.h ++ * ++ * 0(%esp) - %ebx ++ * 4(%esp) - %ecx ++ * 8(%esp) - %edx ++ * C(%esp) - %esi ++ * 10(%esp) - %edi ++ * 14(%esp) - %ebp ++ * 18(%esp) - %eax ++ * 1C(%esp) - %ds ++ * 20(%esp) - %es ++ * 24(%esp) - orig_eax ++ * 28(%esp) - %eip ++ * 2C(%esp) - %cs ++ * 30(%esp) - %eflags ++ * 34(%esp) - %oldesp ++ * 38(%esp) - %oldss ++ * ++ * "current" is in register %ebx during any slow entries. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "irq_vectors.h" ++#include ++ ++#define nr_syscalls ((syscall_table_size)/4) ++ ++EBX = 0x00 ++ECX = 0x04 ++EDX = 0x08 ++ESI = 0x0C ++EDI = 0x10 ++EBP = 0x14 ++EAX = 0x18 ++DS = 0x1C ++ES = 0x20 ++ORIG_EAX = 0x24 ++EIP = 0x28 ++CS = 0x2C ++EFLAGS = 0x30 ++OLDESP = 0x34 ++OLDSS = 0x38 ++ ++CF_MASK = 0x00000001 ++TF_MASK = 0x00000100 ++IF_MASK = 0x00000200 ++DF_MASK = 0x00000400 ++NT_MASK = 0x00004000 ++VM_MASK = 0x00020000 ++/* Pseudo-eflags. */ ++NMI_MASK = 0x80000000 ++ ++#ifndef CONFIG_XEN ++#define DISABLE_INTERRUPTS cli ++#define ENABLE_INTERRUPTS sti ++#else ++/* Offsets into shared_info_t. */ ++#define evtchn_upcall_pending /* 0 */ ++#define evtchn_upcall_mask 1 ++ ++#define sizeof_vcpu_shift 6 ++ ++#ifdef CONFIG_SMP ++#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \ ++ shl $sizeof_vcpu_shift,%esi ; \ ++ addl HYPERVISOR_shared_info,%esi ++#else ++#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi ++#endif ++ ++#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi) ++#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi) ++#define DISABLE_INTERRUPTS GET_VCPU_INFO ; \ ++ __DISABLE_INTERRUPTS ++#define ENABLE_INTERRUPTS GET_VCPU_INFO ; \ ++ __ENABLE_INTERRUPTS ++#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) ++#endif ++ ++#ifdef CONFIG_PREEMPT ++#define preempt_stop cli; TRACE_IRQS_OFF ++#else ++#define preempt_stop ++#define resume_kernel restore_nocheck ++#endif ++ ++.macro TRACE_IRQS_IRET ++#ifdef CONFIG_TRACE_IRQFLAGS ++ testl $IF_MASK,EFLAGS(%esp) # interrupts off? ++ jz 1f ++ TRACE_IRQS_ON ++1: ++#endif ++.endm ++ ++#ifdef CONFIG_VM86 ++#define resume_userspace_sig check_userspace ++#else ++#define resume_userspace_sig resume_userspace ++#endif ++ ++#define SAVE_ALL \ ++ cld; \ ++ pushl %es; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ /*CFI_REL_OFFSET es, 0;*/\ ++ pushl %ds; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ /*CFI_REL_OFFSET ds, 0;*/\ ++ pushl %eax; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET eax, 0;\ ++ pushl %ebp; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET ebp, 0;\ ++ pushl %edi; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET edi, 0;\ ++ pushl %esi; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET esi, 0;\ ++ pushl %edx; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET edx, 0;\ ++ pushl %ecx; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET ecx, 0;\ ++ pushl %ebx; \ ++ CFI_ADJUST_CFA_OFFSET 4;\ ++ CFI_REL_OFFSET ebx, 0;\ ++ movl $(__USER_DS), %edx; \ ++ movl %edx, %ds; \ ++ movl %edx, %es; ++ ++#define RESTORE_INT_REGS \ ++ popl %ebx; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE ebx;\ ++ popl %ecx; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE ecx;\ ++ popl %edx; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE edx;\ ++ popl %esi; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE esi;\ ++ popl %edi; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE edi;\ ++ popl %ebp; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE ebp;\ ++ popl %eax; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ CFI_RESTORE eax ++ ++#define RESTORE_REGS \ ++ RESTORE_INT_REGS; \ ++1: popl %ds; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ /*CFI_RESTORE ds;*/\ ++2: popl %es; \ ++ CFI_ADJUST_CFA_OFFSET -4;\ ++ /*CFI_RESTORE es;*/\ ++.section .fixup,"ax"; \ ++3: movl $0,(%esp); \ ++ jmp 1b; \ ++4: movl $0,(%esp); \ ++ jmp 2b; \ ++.previous; \ ++.section __ex_table,"a";\ ++ .align 4; \ ++ .long 1b,3b; \ ++ .long 2b,4b; \ ++.previous ++ ++#define RING0_INT_FRAME \ ++ CFI_STARTPROC simple;\ ++ CFI_DEF_CFA esp, 3*4;\ ++ /*CFI_OFFSET cs, -2*4;*/\ ++ CFI_OFFSET eip, -3*4 ++ ++#define RING0_EC_FRAME \ ++ CFI_STARTPROC simple;\ ++ CFI_DEF_CFA esp, 4*4;\ ++ /*CFI_OFFSET cs, -2*4;*/\ ++ CFI_OFFSET eip, -3*4 ++ ++#define RING0_PTREGS_FRAME \ ++ CFI_STARTPROC simple;\ ++ CFI_DEF_CFA esp, OLDESP-EBX;\ ++ /*CFI_OFFSET cs, CS-OLDESP;*/\ ++ CFI_OFFSET eip, EIP-OLDESP;\ ++ /*CFI_OFFSET es, ES-OLDESP;*/\ ++ /*CFI_OFFSET ds, DS-OLDESP;*/\ ++ CFI_OFFSET eax, EAX-OLDESP;\ ++ CFI_OFFSET ebp, EBP-OLDESP;\ ++ CFI_OFFSET edi, EDI-OLDESP;\ ++ CFI_OFFSET esi, ESI-OLDESP;\ ++ CFI_OFFSET edx, EDX-OLDESP;\ ++ CFI_OFFSET ecx, ECX-OLDESP;\ ++ CFI_OFFSET ebx, EBX-OLDESP ++ ++ENTRY(ret_from_fork) ++ CFI_STARTPROC ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ call schedule_tail ++ GET_THREAD_INFO(%ebp) ++ popl %eax ++ CFI_ADJUST_CFA_OFFSET -4 ++ pushl $0x0202 # Reset kernel eflags ++ CFI_ADJUST_CFA_OFFSET 4 ++ popfl ++ CFI_ADJUST_CFA_OFFSET -4 ++ jmp syscall_exit ++ CFI_ENDPROC ++ ++/* ++ * Return to user mode is not as complex as all this looks, ++ * but we want the default path for a system call return to ++ * go as quickly as possible which is why some of this is ++ * less clear than it otherwise should be. ++ */ ++ ++ # userspace resumption stub bypassing syscall exit tracing ++ ALIGN ++ RING0_PTREGS_FRAME ++ret_from_exception: ++ preempt_stop ++ret_from_intr: ++ GET_THREAD_INFO(%ebp) ++check_userspace: ++ movl EFLAGS(%esp), %eax # mix EFLAGS and CS ++ movb CS(%esp), %al ++ testl $(VM_MASK | 2), %eax ++ jz resume_kernel ++ENTRY(resume_userspace) ++ DISABLE_INTERRUPTS # make sure we don't miss an interrupt ++ # setting need_resched or sigpending ++ # between sampling and the iret ++ movl TI_flags(%ebp), %ecx ++ andl $_TIF_WORK_MASK, %ecx # is there any work to be done on ++ # int/exception return? ++ jne work_pending ++ jmp restore_all ++ ++#ifdef CONFIG_PREEMPT ++ENTRY(resume_kernel) ++ cli ++ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? ++ jnz restore_nocheck ++need_resched: ++ movl TI_flags(%ebp), %ecx # need_resched set ? ++ testb $_TIF_NEED_RESCHED, %cl ++ jz restore_all ++ testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? ++ jz restore_all ++ call preempt_schedule_irq ++ jmp need_resched ++#endif ++ CFI_ENDPROC ++ ++/* SYSENTER_RETURN points to after the "sysenter" instruction in ++ the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ ++ ++ # sysenter call handler stub ++ENTRY(sysenter_entry) ++ CFI_STARTPROC simple ++ CFI_DEF_CFA esp, 0 ++ CFI_REGISTER esp, ebp ++ movl SYSENTER_stack_esp0(%esp),%esp ++sysenter_past_esp: ++ /* ++ * No need to follow this irqs on/off section: the syscall ++ * disabled irqs and here we enable it straight after entry: ++ */ ++ sti ++ pushl $(__USER_DS) ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET ss, 0*/ ++ pushl %ebp ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET esp, 0 ++ pushfl ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $(__USER_CS) ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET cs, 0*/ ++ /* ++ * Push current_thread_info()->sysenter_return to the stack. ++ * A tiny bit of offset fixup is necessary - 4*4 means the 4 words ++ * pushed above; +8 corresponds to copy_thread's esp0 setting. ++ */ ++ pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET eip, 0 ++ ++/* ++ * Load the potential sixth argument from user stack. ++ * Careful about security. ++ */ ++ cmpl $__PAGE_OFFSET-3,%ebp ++ jae syscall_fault ++1: movl (%ebp),%ebp ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,syscall_fault ++.previous ++ ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ GET_THREAD_INFO(%ebp) ++ ++ /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ ++ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) ++ jnz syscall_trace_entry ++ cmpl $(nr_syscalls), %eax ++ jae syscall_badsys ++ call *sys_call_table(,%eax,4) ++ movl %eax,EAX(%esp) ++ DISABLE_INTERRUPTS ++ TRACE_IRQS_OFF ++ movl TI_flags(%ebp), %ecx ++ testw $_TIF_ALLWORK_MASK, %cx ++ jne syscall_exit_work ++/* if something modifies registers it must also disable sysexit */ ++ movl EIP(%esp), %edx ++ movl OLDESP(%esp), %ecx ++ xorl %ebp,%ebp ++#ifdef CONFIG_XEN ++ TRACE_IRQS_ON ++ __ENABLE_INTERRUPTS ++sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ++ __TEST_PENDING ++ jnz 14f # process more events if necessary... ++ movl ESI(%esp), %esi ++ sysexit ++14: __DISABLE_INTERRUPTS ++ TRACE_IRQS_OFF ++sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ++ push %esp ++ call evtchn_do_upcall ++ add $4,%esp ++ jmp ret_from_intr ++#else ++ TRACE_IRQS_ON ++ sti ++ sysexit ++#endif /* !CONFIG_XEN */ ++ CFI_ENDPROC ++ ++ # pv sysenter call handler stub ++ENTRY(sysenter_entry_pv) ++ RING0_INT_FRAME ++ movl $__USER_DS,16(%esp) ++ movl %ebp,12(%esp) ++ movl $__USER_CS,4(%esp) ++ addl $4,%esp ++ /* +5*4 is SS:ESP,EFLAGS,CS:EIP. +8 is esp0 setting. */ ++ pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) ++/* ++ * Load the potential sixth argument from user stack. ++ * Careful about security. ++ */ ++ cmpl $__PAGE_OFFSET-3,%ebp ++ jae syscall_fault ++1: movl (%ebp),%ebp ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,syscall_fault ++.previous ++ /* fall through */ ++ CFI_ENDPROC ++ENDPROC(sysenter_entry_pv) ++ ++ # system call handler stub ++ENTRY(system_call) ++ RING0_INT_FRAME # can't unwind into user space anyway ++ pushl %eax # save orig_eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ GET_THREAD_INFO(%ebp) ++ testl $TF_MASK,EFLAGS(%esp) ++ jz no_singlestep ++ orl $_TIF_SINGLESTEP,TI_flags(%ebp) ++no_singlestep: ++ # system call tracing in operation / emulation ++ /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ ++ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) ++ jnz syscall_trace_entry ++ cmpl $(nr_syscalls), %eax ++ jae syscall_badsys ++syscall_call: ++ call *sys_call_table(,%eax,4) ++ movl %eax,EAX(%esp) # store the return value ++syscall_exit: ++ DISABLE_INTERRUPTS # make sure we don't miss an interrupt ++ # setting need_resched or sigpending ++ # between sampling and the iret ++ TRACE_IRQS_OFF ++ movl TI_flags(%ebp), %ecx ++ testw $_TIF_ALLWORK_MASK, %cx # current->work ++ jne syscall_exit_work ++ ++restore_all: ++#ifndef CONFIG_XEN ++ movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS ++ # Warning: OLDSS(%esp) contains the wrong/random values if we ++ # are returning to the kernel. ++ # See comments in process.c:copy_thread() for details. ++ movb OLDSS(%esp), %ah ++ movb CS(%esp), %al ++ andl $(VM_MASK | (4 << 8) | 3), %eax ++ cmpl $((4 << 8) | 3), %eax ++ CFI_REMEMBER_STATE ++ je ldt_ss # returning to user-space with LDT SS ++restore_nocheck: ++#else ++restore_nocheck: ++ movl EFLAGS(%esp), %eax ++ testl $(VM_MASK|NMI_MASK), %eax ++ CFI_REMEMBER_STATE ++ jnz hypervisor_iret ++ shr $9, %eax # EAX[0] == IRET_EFLAGS.IF ++ GET_VCPU_INFO ++ andb evtchn_upcall_mask(%esi),%al ++ andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask ++ CFI_REMEMBER_STATE ++ jnz restore_all_enable_events # != 0 => enable event delivery ++#endif ++ TRACE_IRQS_IRET ++restore_nocheck_notrace: ++ RESTORE_REGS ++ addl $4, %esp ++ CFI_ADJUST_CFA_OFFSET -4 ++1: iret ++.section .fixup,"ax" ++iret_exc: ++#ifndef CONFIG_XEN ++ TRACE_IRQS_ON ++ sti ++#endif ++ pushl $0 # no error code ++ pushl $do_iret_error ++ jmp error_code ++.previous ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,iret_exc ++.previous ++ ++ CFI_RESTORE_STATE ++#ifndef CONFIG_XEN ++ldt_ss: ++ larl OLDSS(%esp), %eax ++ jnz restore_nocheck ++ testl $0x00400000, %eax # returning to 32bit stack? ++ jnz restore_nocheck # allright, normal return ++ /* If returning to userspace with 16bit stack, ++ * try to fix the higher word of ESP, as the CPU ++ * won't restore it. ++ * This is an "official" bug of all the x86-compatible ++ * CPUs, which we can try to work around to make ++ * dosemu and wine happy. */ ++ subl $8, %esp # reserve space for switch16 pointer ++ CFI_ADJUST_CFA_OFFSET 8 ++ cli ++ TRACE_IRQS_OFF ++ movl %esp, %eax ++ /* Set up the 16bit stack frame with switch32 pointer on top, ++ * and a switch16 pointer on top of the current frame. */ ++ call setup_x86_bogus_stack ++ CFI_ADJUST_CFA_OFFSET -8 # frame has moved ++ TRACE_IRQS_IRET ++ RESTORE_REGS ++ lss 20+4(%esp), %esp # switch to 16bit stack ++1: iret ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,iret_exc ++.previous ++#else ++ ALIGN ++restore_all_enable_events: ++ TRACE_IRQS_ON ++ __ENABLE_INTERRUPTS ++scrit: /**** START OF CRITICAL REGION ****/ ++ __TEST_PENDING ++ jnz 14f # process more events if necessary... ++ RESTORE_REGS ++ addl $4, %esp ++ CFI_ADJUST_CFA_OFFSET -4 ++1: iret ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,iret_exc ++.previous ++14: __DISABLE_INTERRUPTS ++ TRACE_IRQS_OFF ++ jmp 11f ++ecrit: /**** END OF CRITICAL REGION ****/ ++ ++ CFI_RESTORE_STATE ++hypervisor_iret: ++ andl $~NMI_MASK, EFLAGS(%esp) ++ RESTORE_REGS ++ addl $4, %esp ++ CFI_ADJUST_CFA_OFFSET -4 ++ jmp hypercall_page + (__HYPERVISOR_iret * 32) ++#endif ++ CFI_ENDPROC ++ ++ # perform work that needs to be done immediately before resumption ++ ALIGN ++ RING0_PTREGS_FRAME # can't unwind into user space anyway ++work_pending: ++ testb $_TIF_NEED_RESCHED, %cl ++ jz work_notifysig ++work_resched: ++ call schedule ++ DISABLE_INTERRUPTS # make sure we don't miss an interrupt ++ # setting need_resched or sigpending ++ # between sampling and the iret ++ TRACE_IRQS_OFF ++ movl TI_flags(%ebp), %ecx ++ andl $_TIF_WORK_MASK, %ecx # is there any work to be done other ++ # than syscall tracing? ++ jz restore_all ++ testb $_TIF_NEED_RESCHED, %cl ++ jnz work_resched ++ ++work_notifysig: # deal with pending signals and ++ # notify-resume requests ++ testl $VM_MASK, EFLAGS(%esp) ++ movl %esp, %eax ++ jne work_notifysig_v86 # returning to kernel-space or ++ # vm86-space ++ xorl %edx, %edx ++ call do_notify_resume ++ jmp resume_userspace_sig ++ ++ ALIGN ++work_notifysig_v86: ++#ifdef CONFIG_VM86 ++ pushl %ecx # save ti_flags for do_notify_resume ++ CFI_ADJUST_CFA_OFFSET 4 ++ call save_v86_state # %eax contains pt_regs pointer ++ popl %ecx ++ CFI_ADJUST_CFA_OFFSET -4 ++ movl %eax, %esp ++ xorl %edx, %edx ++ call do_notify_resume ++ jmp resume_userspace_sig ++#endif ++ ++ # perform syscall exit tracing ++ ALIGN ++syscall_trace_entry: ++ movl $-ENOSYS,EAX(%esp) ++ movl %esp, %eax ++ xorl %edx,%edx ++ call do_syscall_trace ++ cmpl $0, %eax ++ jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, ++ # so must skip actual syscall ++ movl ORIG_EAX(%esp), %eax ++ cmpl $(nr_syscalls), %eax ++ jnae syscall_call ++ jmp syscall_exit ++ ++ # perform syscall exit tracing ++ ALIGN ++syscall_exit_work: ++ testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl ++ jz work_pending ++ TRACE_IRQS_ON ++ ENABLE_INTERRUPTS # could let do_syscall_trace() call ++ # schedule() instead ++ movl %esp, %eax ++ movl $1, %edx ++ call do_syscall_trace ++ jmp resume_userspace ++ CFI_ENDPROC ++ ++ RING0_INT_FRAME # can't unwind into user space anyway ++syscall_fault: ++ pushl %eax # save orig_eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ GET_THREAD_INFO(%ebp) ++ movl $-EFAULT,EAX(%esp) ++ jmp resume_userspace ++ ++syscall_badsys: ++ movl $-ENOSYS,EAX(%esp) ++ jmp resume_userspace ++ CFI_ENDPROC ++ ++#ifndef CONFIG_XEN ++#define FIXUP_ESPFIX_STACK \ ++ movl %esp, %eax; \ ++ /* switch to 32bit stack using the pointer on top of 16bit stack */ \ ++ lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \ ++ /* copy data from 16bit stack to 32bit stack */ \ ++ call fixup_x86_bogus_stack; \ ++ /* put ESP to the proper location */ \ ++ movl %eax, %esp; ++#define UNWIND_ESPFIX_STACK \ ++ pushl %eax; \ ++ CFI_ADJUST_CFA_OFFSET 4; \ ++ movl %ss, %eax; \ ++ /* see if on 16bit stack */ \ ++ cmpw $__ESPFIX_SS, %ax; \ ++ je 28f; \ ++27: popl %eax; \ ++ CFI_ADJUST_CFA_OFFSET -4; \ ++.section .fixup,"ax"; \ ++28: movl $__KERNEL_DS, %eax; \ ++ movl %eax, %ds; \ ++ movl %eax, %es; \ ++ /* switch to 32bit stack */ \ ++ FIXUP_ESPFIX_STACK; \ ++ jmp 27b; \ ++.previous ++ ++/* ++ * Build the entry stubs and pointer table with ++ * some assembler magic. ++ */ ++.data ++ENTRY(interrupt) ++.text ++ ++vector=0 ++ENTRY(irq_entries_start) ++ RING0_INT_FRAME ++.rept NR_IRQS ++ ALIGN ++ .if vector ++ CFI_ADJUST_CFA_OFFSET -4 ++ .endif ++1: pushl $~(vector) ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp common_interrupt ++.data ++ .long 1b ++.text ++vector=vector+1 ++.endr ++ ++/* ++ * the CPU automatically disables interrupts when executing an IRQ vector, ++ * so IRQ-flags tracing has to follow that: ++ */ ++ ALIGN ++common_interrupt: ++ SAVE_ALL ++ TRACE_IRQS_OFF ++ movl %esp,%eax ++ call do_IRQ ++ jmp ret_from_intr ++ CFI_ENDPROC ++ ++#define BUILD_INTERRUPT(name, nr) \ ++ENTRY(name) \ ++ RING0_INT_FRAME; \ ++ pushl $~(nr); \ ++ CFI_ADJUST_CFA_OFFSET 4; \ ++ SAVE_ALL; \ ++ TRACE_IRQS_OFF \ ++ movl %esp,%eax; \ ++ call smp_/**/name; \ ++ jmp ret_from_intr; \ ++ CFI_ENDPROC ++ ++/* The include is where all of the SMP etc. interrupts come from */ ++#include "entry_arch.h" ++#else ++#define UNWIND_ESPFIX_STACK ++#endif ++ ++ENTRY(divide_error) ++ RING0_INT_FRAME ++ pushl $0 # no error code ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_divide_error ++ CFI_ADJUST_CFA_OFFSET 4 ++ ALIGN ++error_code: ++ pushl %ds ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET ds, 0*/ ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET eax, 0 ++ xorl %eax, %eax ++ pushl %ebp ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET ebp, 0 ++ pushl %edi ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET edi, 0 ++ pushl %esi ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET esi, 0 ++ pushl %edx ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET edx, 0 ++ decl %eax # eax = -1 ++ pushl %ecx ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET ecx, 0 ++ pushl %ebx ++ CFI_ADJUST_CFA_OFFSET 4 ++ CFI_REL_OFFSET ebx, 0 ++ cld ++ pushl %es ++ CFI_ADJUST_CFA_OFFSET 4 ++ /*CFI_REL_OFFSET es, 0*/ ++ UNWIND_ESPFIX_STACK ++ popl %ecx ++ CFI_ADJUST_CFA_OFFSET -4 ++ /*CFI_REGISTER es, ecx*/ ++ movl ES(%esp), %edi # get the function address ++ movl ORIG_EAX(%esp), %edx # get the error code ++ movl %eax, ORIG_EAX(%esp) ++ movl %ecx, ES(%esp) ++ /*CFI_REL_OFFSET es, ES*/ ++ movl $(__USER_DS), %ecx ++ movl %ecx, %ds ++ movl %ecx, %es ++ movl %esp,%eax # pt_regs pointer ++ call *%edi ++ jmp ret_from_exception ++ CFI_ENDPROC ++ ++#ifdef CONFIG_XEN ++# A note on the "critical region" in our callback handler. ++# We want to avoid stacking callback handlers due to events occurring ++# during handling of the last event. To do this, we keep events disabled ++# until we've done all processing. HOWEVER, we must enable events before ++# popping the stack frame (can't be done atomically) and so it would still ++# be possible to get enough handler activations to overflow the stack. ++# Although unlikely, bugs of that kind are hard to track down, so we'd ++# like to avoid the possibility. ++# So, on entry to the handler we detect whether we interrupted an ++# existing activation in its critical region -- if so, we pop the current ++# activation and restart the handler using the previous one. ++# ++# The sysexit critical region is slightly different. sysexit ++# atomically removes the entire stack frame. If we interrupt in the ++# critical region we know that the entire frame is present and correct ++# so we can simply throw away the new one. ++ENTRY(hypervisor_callback) ++ RING0_INT_FRAME ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ movl EIP(%esp),%eax ++ cmpl $scrit,%eax ++ jb 11f ++ cmpl $ecrit,%eax ++ jb critical_region_fixup ++ cmpl $sysexit_scrit,%eax ++ jb 11f ++ cmpl $sysexit_ecrit,%eax ++ ja 11f ++ addl $OLDESP,%esp # Remove eflags...ebx from stack frame. ++11: push %esp ++ CFI_ADJUST_CFA_OFFSET 4 ++ call evtchn_do_upcall ++ add $4,%esp ++ CFI_ADJUST_CFA_OFFSET -4 ++ jmp ret_from_intr ++ CFI_ENDPROC ++ ++# [How we do the fixup]. We want to merge the current stack frame with the ++# just-interrupted frame. How we do this depends on where in the critical ++# region the interrupted handler was executing, and so how many saved ++# registers are in each frame. We do this quickly using the lookup table ++# 'critical_fixup_table'. For each byte offset in the critical region, it ++# provides the number of bytes which have already been popped from the ++# interrupted stack frame. ++critical_region_fixup: ++ movzbl critical_fixup_table-scrit(%eax),%ecx # %eax contains num bytes popped ++ cmpb $0xff,%cl # 0xff => vcpu_info critical region ++ jne 15f ++ xorl %ecx,%ecx ++15: leal (%esp,%ecx),%esi # %esi points at end of src region ++ leal OLDESP(%esp),%edi # %edi points at end of dst region ++ shrl $2,%ecx # convert words to bytes ++ je 17f # skip loop if nothing to copy ++16: subl $4,%esi # pre-decrementing copy loop ++ subl $4,%edi ++ movl (%esi),%eax ++ movl %eax,(%edi) ++ loop 16b ++17: movl %edi,%esp # final %edi is top of merged stack ++ jmp 11b ++ ++.section .rodata,"a" ++critical_fixup_table: ++ .byte 0xff,0xff,0xff # testb $0xff,(%esi) = __TEST_PENDING ++ .byte 0xff,0xff # jnz 14f ++ .byte 0x00 # pop %ebx ++ .byte 0x04 # pop %ecx ++ .byte 0x08 # pop %edx ++ .byte 0x0c # pop %esi ++ .byte 0x10 # pop %edi ++ .byte 0x14 # pop %ebp ++ .byte 0x18 # pop %eax ++ .byte 0x1c # pop %ds ++ .byte 0x20 # pop %es ++ .byte 0x24,0x24,0x24 # add $4,%esp ++ .byte 0x28 # iret ++ .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi) ++ .byte 0x00,0x00 # jmp 11b ++.previous ++ ++# Hypervisor uses this for application faults while it executes. ++# We get here for two reasons: ++# 1. Fault while reloading DS, ES, FS or GS ++# 2. Fault while executing IRET ++# Category 1 we fix up by reattempting the load, and zeroing the segment ++# register if the load fails. ++# Category 2 we fix up by jumping to do_iret_error. We cannot use the ++# normal Linux return path in this case because if we use the IRET hypercall ++# to pop the stack frame we end up in an infinite loop of failsafe callbacks. ++# We distinguish between categories by maintaining a status value in EAX. ++ENTRY(failsafe_callback) ++ pushl %eax ++ movl $1,%eax ++1: mov 4(%esp),%ds ++2: mov 8(%esp),%es ++3: mov 12(%esp),%fs ++4: mov 16(%esp),%gs ++ testl %eax,%eax ++ popl %eax ++ jz 5f ++ addl $16,%esp # EAX != 0 => Category 2 (Bad IRET) ++ jmp iret_exc ++5: addl $16,%esp # EAX == 0 => Category 1 (Bad segment) ++ RING0_INT_FRAME ++ pushl $0 ++ SAVE_ALL ++ jmp ret_from_exception ++.section .fixup,"ax"; \ ++6: xorl %eax,%eax; \ ++ movl %eax,4(%esp); \ ++ jmp 1b; \ ++7: xorl %eax,%eax; \ ++ movl %eax,8(%esp); \ ++ jmp 2b; \ ++8: xorl %eax,%eax; \ ++ movl %eax,12(%esp); \ ++ jmp 3b; \ ++9: xorl %eax,%eax; \ ++ movl %eax,16(%esp); \ ++ jmp 4b; \ ++.previous; \ ++.section __ex_table,"a"; \ ++ .align 4; \ ++ .long 1b,6b; \ ++ .long 2b,7b; \ ++ .long 3b,8b; \ ++ .long 4b,9b; \ ++.previous ++#endif ++ CFI_ENDPROC ++ ++ENTRY(coprocessor_error) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_coprocessor_error ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(simd_coprocessor_error) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_simd_coprocessor_error ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(device_not_available) ++ RING0_INT_FRAME ++ pushl $-1 # mark this as an int ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++#ifndef CONFIG_XEN ++ movl %cr0, %eax ++ testl $0x4, %eax # EM (math emulation bit) ++ je device_available_emulate ++ pushl $0 # temporary storage for ORIG_EIP ++ CFI_ADJUST_CFA_OFFSET 4 ++ call math_emulate ++ addl $4, %esp ++ CFI_ADJUST_CFA_OFFSET -4 ++ jmp ret_from_exception ++device_available_emulate: ++#endif ++ preempt_stop ++ call math_state_restore ++ jmp ret_from_exception ++ CFI_ENDPROC ++ ++#ifndef CONFIG_XEN ++/* ++ * Debug traps and NMI can happen at the one SYSENTER instruction ++ * that sets up the real kernel stack. Check here, since we can't ++ * allow the wrong stack to be used. ++ * ++ * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have ++ * already pushed 3 words if it hits on the sysenter instruction: ++ * eflags, cs and eip. ++ * ++ * We just load the right stack, and push the three (known) values ++ * by hand onto the new stack - while updating the return eip past ++ * the instruction that would have done it for sysenter. ++ */ ++#define FIX_STACK(offset, ok, label) \ ++ cmpw $__KERNEL_CS,4(%esp); \ ++ jne ok; \ ++label: \ ++ movl SYSENTER_stack_esp0+offset(%esp),%esp; \ ++ pushfl; \ ++ pushl $__KERNEL_CS; \ ++ pushl $sysenter_past_esp ++#endif /* CONFIG_XEN */ ++ ++KPROBE_ENTRY(debug) ++ RING0_INT_FRAME ++#ifndef CONFIG_XEN ++ cmpl $sysenter_entry,(%esp) ++ jne debug_stack_correct ++ FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) ++debug_stack_correct: ++#endif /* !CONFIG_XEN */ ++ pushl $-1 # mark this as an int ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ xorl %edx,%edx # error code 0 ++ movl %esp,%eax # pt_regs pointer ++ call do_debug ++ jmp ret_from_exception ++ CFI_ENDPROC ++ .previous .text ++#ifndef CONFIG_XEN ++/* ++ * NMI is doubly nasty. It can happen _while_ we're handling ++ * a debug fault, and the debug fault hasn't yet been able to ++ * clear up the stack. So we first check whether we got an ++ * NMI on the sysenter entry path, but after that we need to ++ * check whether we got an NMI on the debug path where the debug ++ * fault happened on the sysenter path. ++ */ ++ENTRY(nmi) ++ RING0_INT_FRAME ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ movl %ss, %eax ++ cmpw $__ESPFIX_SS, %ax ++ popl %eax ++ CFI_ADJUST_CFA_OFFSET -4 ++ je nmi_16bit_stack ++ cmpl $sysenter_entry,(%esp) ++ je nmi_stack_fixup ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ movl %esp,%eax ++ /* Do not access memory above the end of our stack page, ++ * it might not exist. ++ */ ++ andl $(THREAD_SIZE-1),%eax ++ cmpl $(THREAD_SIZE-20),%eax ++ popl %eax ++ CFI_ADJUST_CFA_OFFSET -4 ++ jae nmi_stack_correct ++ cmpl $sysenter_entry,12(%esp) ++ je nmi_debug_stack_check ++nmi_stack_correct: ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ xorl %edx,%edx # zero error code ++ movl %esp,%eax # pt_regs pointer ++ call do_nmi ++ jmp restore_nocheck_notrace ++ CFI_ENDPROC ++ ++nmi_stack_fixup: ++ FIX_STACK(12,nmi_stack_correct, 1) ++ jmp nmi_stack_correct ++nmi_debug_stack_check: ++ cmpw $__KERNEL_CS,16(%esp) ++ jne nmi_stack_correct ++ cmpl $debug,(%esp) ++ jb nmi_stack_correct ++ cmpl $debug_esp_fix_insn,(%esp) ++ ja nmi_stack_correct ++ FIX_STACK(24,nmi_stack_correct, 1) ++ jmp nmi_stack_correct ++ ++nmi_16bit_stack: ++ RING0_INT_FRAME ++ /* create the pointer to lss back */ ++ pushl %ss ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl %esp ++ CFI_ADJUST_CFA_OFFSET 4 ++ movzwl %sp, %esp ++ addw $4, (%esp) ++ /* copy the iret frame of 12 bytes */ ++ .rept 3 ++ pushl 16(%esp) ++ CFI_ADJUST_CFA_OFFSET 4 ++ .endr ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ FIXUP_ESPFIX_STACK # %eax == %esp ++ CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved ++ xorl %edx,%edx # zero error code ++ call do_nmi ++ RESTORE_REGS ++ lss 12+4(%esp), %esp # back to 16bit stack ++1: iret ++ CFI_ENDPROC ++.section __ex_table,"a" ++ .align 4 ++ .long 1b,iret_exc ++.previous ++#else ++ENTRY(nmi) ++ RING0_INT_FRAME ++ pushl %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ xorl %edx,%edx # zero error code ++ movl %esp,%eax # pt_regs pointer ++ call do_nmi ++ orl $NMI_MASK, EFLAGS(%esp) ++ jmp restore_all ++ CFI_ENDPROC ++#endif ++ ++KPROBE_ENTRY(int3) ++ RING0_INT_FRAME ++ pushl $-1 # mark this as an int ++ CFI_ADJUST_CFA_OFFSET 4 ++ SAVE_ALL ++ xorl %edx,%edx # zero error code ++ movl %esp,%eax # pt_regs pointer ++ call do_int3 ++ jmp ret_from_exception ++ CFI_ENDPROC ++ .previous .text ++ ++ENTRY(overflow) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_overflow ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(bounds) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_bounds ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(invalid_op) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_invalid_op ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(coprocessor_segment_overrun) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_coprocessor_segment_overrun ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(invalid_TSS) ++ RING0_EC_FRAME ++ pushl $do_invalid_TSS ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(segment_not_present) ++ RING0_EC_FRAME ++ pushl $do_segment_not_present ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++ENTRY(stack_segment) ++ RING0_EC_FRAME ++ pushl $do_stack_segment ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++KPROBE_ENTRY(general_protection) ++ RING0_EC_FRAME ++ pushl $do_general_protection ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ .previous .text ++ ++ENTRY(alignment_check) ++ RING0_EC_FRAME ++ pushl $do_alignment_check ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++KPROBE_ENTRY(page_fault) ++ RING0_EC_FRAME ++ pushl $do_page_fault ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ .previous .text ++ ++#ifdef CONFIG_X86_MCE ++ENTRY(machine_check) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl machine_check_vector ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++#endif ++ ++#ifndef CONFIG_XEN ++ENTRY(spurious_interrupt_bug) ++ RING0_INT_FRAME ++ pushl $0 ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_spurious_interrupt_bug ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++#endif /* !CONFIG_XEN */ ++ ++#ifdef CONFIG_STACK_UNWIND ++ENTRY(arch_unwind_init_running) ++ CFI_STARTPROC ++ movl 4(%esp), %edx ++ movl (%esp), %ecx ++ leal 4(%esp), %eax ++ movl %ebx, EBX(%edx) ++ xorl %ebx, %ebx ++ movl %ebx, ECX(%edx) ++ movl %ebx, EDX(%edx) ++ movl %esi, ESI(%edx) ++ movl %edi, EDI(%edx) ++ movl %ebp, EBP(%edx) ++ movl %ebx, EAX(%edx) ++ movl $__USER_DS, DS(%edx) ++ movl $__USER_DS, ES(%edx) ++ movl %ebx, ORIG_EAX(%edx) ++ movl %ecx, EIP(%edx) ++ movl 12(%esp), %ecx ++ movl $__KERNEL_CS, CS(%edx) ++ movl %ebx, EFLAGS(%edx) ++ movl %eax, OLDESP(%edx) ++ movl 8(%esp), %eax ++ movl %ecx, 8(%esp) ++ movl EBX(%edx), %ebx ++ movl $__KERNEL_DS, OLDSS(%edx) ++ jmpl *%eax ++ CFI_ENDPROC ++ENDPROC(arch_unwind_init_running) ++#endif ++ ++ENTRY(fixup_4gb_segment) ++ RING0_EC_FRAME ++ pushl $do_fixup_4gb_segment ++ CFI_ADJUST_CFA_OFFSET 4 ++ jmp error_code ++ CFI_ENDPROC ++ ++.section .rodata,"a" ++#include "syscall_table.S" ++ ++syscall_table_size=(.-sys_call_table) +Index: head-2008-11-25/arch/x86/kernel/fixup.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/fixup.c 2008-01-28 12:24:18.000000000 +0100 +@@ -0,0 +1,88 @@ ++/****************************************************************************** ++ * fixup.c ++ * ++ * Binary-rewriting of certain IA32 instructions, on notification by Xen. ++ * Used to avoid repeated slow emulation of common instructions used by the ++ * user-space TLS (Thread-Local Storage) libraries. ++ * ++ * **** NOTE **** ++ * Issues with the binary rewriting have caused it to be removed. Instead ++ * we rely on Xen's emulator to boot the kernel, and then print a banner ++ * message recommending that the user disables /lib/tls. ++ * ++ * Copyright (c) 2004, K A Fraser ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define DP(_f, _args...) printk(KERN_ALERT " " _f "\n" , ## _args ) ++ ++fastcall void do_fixup_4gb_segment(struct pt_regs *regs, long error_code) ++{ ++ static unsigned long printed = 0; ++ char info[100]; ++ int i; ++ ++ /* Ignore statically-linked init. */ ++ if (current->tgid == 1) ++ return; ++ ++ VOID(HYPERVISOR_vm_assist(VMASST_CMD_disable, ++ VMASST_TYPE_4gb_segments_notify)); ++ ++ if (test_and_set_bit(0, &printed)) ++ return; ++ ++ sprintf(info, "%s (pid=%d)", current->comm, current->tgid); ++ ++ DP(""); ++ DP("***************************************************************"); ++ DP("***************************************************************"); ++ DP("** WARNING: Currently emulating unsupported memory accesses **"); ++ DP("** in /lib/tls glibc libraries. The emulation is **"); ++ DP("** slow. To ensure full performance you should **"); ++ DP("** install a 'xen-friendly' (nosegneg) version of **"); ++ DP("** the library, or disable tls support by executing **"); ++ DP("** the following as root: **"); ++ DP("** mv /lib/tls /lib/tls.disabled **"); ++ DP("** Offending process: %-38.38s **", info); ++ DP("***************************************************************"); ++ DP("***************************************************************"); ++ DP(""); ++ ++ for (i = 5; i > 0; i--) { ++ touch_softlockup_watchdog(); ++ printk("Pausing... %d", i); ++ mdelay(1000); ++ printk("\b\b\b\b\b\b\b\b\b\b\b\b"); ++ } ++ ++ printk("Continuing...\n\n"); ++} ++ ++static int __init fixup_init(void) ++{ ++ WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable, ++ VMASST_TYPE_4gb_segments_notify)); ++ return 0; ++} ++__initcall(fixup_init); +Index: head-2008-11-25/arch/x86/kernel/head_32-xen.S +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/head_32-xen.S 2007-06-12 13:12:48.000000000 +0200 +@@ -0,0 +1,207 @@ ++ ++ ++.text ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * References to members of the new_cpu_data structure. ++ */ ++ ++#define X86 new_cpu_data+CPUINFO_x86 ++#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor ++#define X86_MODEL new_cpu_data+CPUINFO_x86_model ++#define X86_MASK new_cpu_data+CPUINFO_x86_mask ++#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math ++#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level ++#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability ++#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id ++ ++#define VIRT_ENTRY_OFFSET 0x0 ++.org VIRT_ENTRY_OFFSET ++ENTRY(startup_32) ++ movl %esi,xen_start_info ++ cld ++ ++ /* Set up the stack pointer */ ++ movl $(init_thread_union+THREAD_SIZE),%esp ++ ++ /* get vendor info */ ++ xorl %eax,%eax # call CPUID with 0 -> return vendor ID ++ XEN_CPUID ++ movl %eax,X86_CPUID # save CPUID level ++ movl %ebx,X86_VENDOR_ID # lo 4 chars ++ movl %edx,X86_VENDOR_ID+4 # next 4 chars ++ movl %ecx,X86_VENDOR_ID+8 # last 4 chars ++ ++ movl $1,%eax # Use the CPUID instruction to get CPU type ++ XEN_CPUID ++ movb %al,%cl # save reg for future use ++ andb $0x0f,%ah # mask processor family ++ movb %ah,X86 ++ andb $0xf0,%al # mask model ++ shrb $4,%al ++ movb %al,X86_MODEL ++ andb $0x0f,%cl # mask mask revision ++ movb %cl,X86_MASK ++ movl %edx,X86_CAPABILITY ++ ++ movb $1,X86_HARD_MATH ++ ++ xorl %eax,%eax # Clear FS/GS and LDT ++ movl %eax,%fs ++ movl %eax,%gs ++ cld # gcc2 wants the direction flag cleared at all times ++ ++ pushl %eax # fake return address ++ jmp start_kernel ++ ++#define HYPERCALL_PAGE_OFFSET 0x1000 ++.org HYPERCALL_PAGE_OFFSET ++ENTRY(hypercall_page) ++ CFI_STARTPROC ++.skip 0x1000 ++ CFI_ENDPROC ++ ++/* ++ * Real beginning of normal "text" segment ++ */ ++ENTRY(stext) ++ENTRY(_stext) ++ ++/* ++ * BSS section ++ */ ++.section ".bss.page_aligned","w" ++ENTRY(empty_zero_page) ++ .fill 4096,1,0 ++ ++/* ++ * This starts the data section. ++ */ ++.data ++ ++/* ++ * The Global Descriptor Table contains 28 quadwords, per-CPU. ++ */ ++ .align L1_CACHE_BYTES ++ENTRY(cpu_gdt_table) ++ .quad 0x0000000000000000 /* NULL descriptor */ ++ .quad 0x0000000000000000 /* 0x0b reserved */ ++ .quad 0x0000000000000000 /* 0x13 reserved */ ++ .quad 0x0000000000000000 /* 0x1b reserved */ ++ .quad 0x0000000000000000 /* 0x20 unused */ ++ .quad 0x0000000000000000 /* 0x28 unused */ ++ .quad 0x0000000000000000 /* 0x33 TLS entry 1 */ ++ .quad 0x0000000000000000 /* 0x3b TLS entry 2 */ ++ .quad 0x0000000000000000 /* 0x43 TLS entry 3 */ ++ .quad 0x0000000000000000 /* 0x4b reserved */ ++ .quad 0x0000000000000000 /* 0x53 reserved */ ++ .quad 0x0000000000000000 /* 0x5b reserved */ ++ ++ .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ ++ .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ ++ .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */ ++ .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */ ++ ++ .quad 0x0000000000000000 /* 0x80 TSS descriptor */ ++ .quad 0x0000000000000000 /* 0x88 LDT descriptor */ ++ ++ /* ++ * Segments used for calling PnP BIOS have byte granularity. ++ * They code segments and data segments have fixed 64k limits, ++ * the transfer segment sizes are set at run time. ++ */ ++ .quad 0x0000000000000000 /* 0x90 32-bit code */ ++ .quad 0x0000000000000000 /* 0x98 16-bit code */ ++ .quad 0x0000000000000000 /* 0xa0 16-bit data */ ++ .quad 0x0000000000000000 /* 0xa8 16-bit data */ ++ .quad 0x0000000000000000 /* 0xb0 16-bit data */ ++ ++ /* ++ * The APM segments have byte granularity and their bases ++ * are set at run time. All have 64k limits. ++ */ ++ .quad 0x0000000000000000 /* 0xb8 APM CS code */ ++ .quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */ ++ .quad 0x0000000000000000 /* 0xc8 APM DS data */ ++ ++ .quad 0x0000000000000000 /* 0xd0 - ESPFIX 16-bit SS */ ++ .quad 0x0000000000000000 /* 0xd8 - unused */ ++ .quad 0x0000000000000000 /* 0xe0 - unused */ ++ .quad 0x0000000000000000 /* 0xe8 - unused */ ++ .quad 0x0000000000000000 /* 0xf0 - unused */ ++ .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ ++ ++#if CONFIG_XEN_COMPAT <= 0x030002 ++/* ++ * __xen_guest information ++ */ ++.macro utoa value ++ .if (\value) < 0 || (\value) >= 0x10 ++ utoa (((\value)>>4)&0x0fffffff) ++ .endif ++ .if ((\value) & 0xf) < 10 ++ .byte '0' + ((\value) & 0xf) ++ .else ++ .byte 'A' + ((\value) & 0xf) - 10 ++ .endif ++.endm ++ ++.section __xen_guest ++ .ascii "GUEST_OS=linux,GUEST_VER=2.6" ++ .ascii ",XEN_VER=xen-3.0" ++ .ascii ",VIRT_BASE=0x" ++ utoa __PAGE_OFFSET ++ .ascii ",ELF_PADDR_OFFSET=0x" ++ utoa __PAGE_OFFSET ++ .ascii ",VIRT_ENTRY=0x" ++ utoa (__PAGE_OFFSET + __PHYSICAL_START + VIRT_ENTRY_OFFSET) ++ .ascii ",HYPERCALL_PAGE=0x" ++ utoa ((__PHYSICAL_START+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT) ++ .ascii ",FEATURES=writable_page_tables" ++ .ascii "|writable_descriptor_tables" ++ .ascii "|auto_translated_physmap" ++ .ascii "|pae_pgdir_above_4gb" ++ .ascii "|supervisor_mode_kernel" ++#ifdef CONFIG_X86_PAE ++ .ascii ",PAE=yes[extended-cr3]" ++#else ++ .ascii ",PAE=no" ++#endif ++ .ascii ",LOADER=generic" ++ .byte 0 ++#endif /* CONFIG_XEN_COMPAT <= 0x030002 */ ++ ++ ++ ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "linux") ++ ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "2.6") ++ ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0") ++ ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long, __PAGE_OFFSET) ++#if CONFIG_XEN_COMPAT <= 0x030002 ++ ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, __PAGE_OFFSET) ++#else ++ ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, 0) ++#endif ++ ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, startup_32) ++ ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypercall_page) ++ ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long, HYPERVISOR_VIRT_START) ++ ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel") ++#ifdef CONFIG_X86_PAE ++ ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes") ++ ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad, _PAGE_PRESENT,_PAGE_PRESENT) ++#else ++ ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "no") ++ ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, _PAGE_PRESENT,_PAGE_PRESENT) ++#endif ++ ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic") ++ ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 1) +Index: head-2008-11-25/arch/x86/kernel/init_task-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/init_task-xen.c 2007-06-12 13:12:48.000000000 +0200 +@@ -0,0 +1,51 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++static struct fs_struct init_fs = INIT_FS; ++static struct files_struct init_files = INIT_FILES; ++static struct signal_struct init_signals = INIT_SIGNALS(init_signals); ++static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); ++ ++#define swapper_pg_dir ((pgd_t *)NULL) ++struct mm_struct init_mm = INIT_MM(init_mm); ++#undef swapper_pg_dir ++ ++EXPORT_SYMBOL(init_mm); ++ ++/* ++ * Initial thread structure. ++ * ++ * We need to make sure that this is THREAD_SIZE aligned due to the ++ * way process stacks are handled. This is done by having a special ++ * "init_task" linker map entry.. ++ */ ++union thread_union init_thread_union ++ __attribute__((__section__(".data.init_task"))) = ++ { INIT_THREAD_INFO(init_task) }; ++ ++/* ++ * Initial task structure. ++ * ++ * All other task structs will be allocated on slabs in fork.c ++ */ ++struct task_struct init_task = INIT_TASK(init_task); ++ ++EXPORT_SYMBOL(init_task); ++ ++#ifndef CONFIG_X86_NO_TSS ++/* ++ * per-CPU TSS segments. Threads are completely 'soft' on Linux, ++ * no more per-task TSS's. ++ */ ++DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; ++#endif ++ +Index: head-2008-11-25/arch/x86/kernel/io_apic_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/io_apic_32-xen.c 2008-11-25 12:22:34.000000000 +0100 +@@ -0,0 +1,2776 @@ ++/* ++ * Intel IO-APIC support for multi-Pentium hosts. ++ * ++ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo ++ * ++ * Many thanks to Stig Venaas for trying out countless experimental ++ * patches and reporting/debugging problems patiently! ++ * ++ * (c) 1999, Multiple IO-APIC support, developed by ++ * Ken-ichi Yaku and ++ * Hidemi Kishimoto , ++ * further tested and cleaned up by Zach Brown ++ * and Ingo Molnar ++ * ++ * Fixes ++ * Maciej W. Rozycki : Bits for genuine 82489DX APICs; ++ * thanks to Eric Gilmore ++ * and Rolf G. Tews ++ * for testing these extensively ++ * Paul Diefenbaugh : Added full ACPI support ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include "io_ports.h" ++ ++#ifdef CONFIG_XEN ++ ++#include ++#include ++#include ++ ++/* Fake i8259 */ ++#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq))) ++#define disable_8259A_irq(_irq) ((void)0) ++#define i8259A_irq_pending(_irq) (0) ++ ++unsigned long io_apic_irqs; ++ ++static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg) ++{ ++ struct physdev_apic apic_op; ++ int ret; ++ ++ apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; ++ apic_op.reg = reg; ++ ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op); ++ if (ret) ++ return ret; ++ return apic_op.value; ++} ++ ++static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) ++{ ++ struct physdev_apic apic_op; ++ ++ apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; ++ apic_op.reg = reg; ++ apic_op.value = value; ++ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op)); ++} ++ ++#define io_apic_read(a,r) xen_io_apic_read(a,r) ++#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v) ++ ++#endif /* CONFIG_XEN */ ++ ++int (*ioapic_renumber_irq)(int ioapic, int irq); ++atomic_t irq_mis_count; ++ ++/* Where if anywhere is the i8259 connect in external int mode */ ++static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; ++ ++static DEFINE_SPINLOCK(ioapic_lock); ++static DEFINE_SPINLOCK(vector_lock); ++ ++int timer_over_8254 __initdata = 1; ++ ++/* ++ * Is the SiS APIC rmw bug present ? ++ * -1 = don't know, 0 = no, 1 = yes ++ */ ++int sis_apic_bug = -1; ++ ++/* ++ * # of IRQ routing registers ++ */ ++int nr_ioapic_registers[MAX_IO_APICS]; ++ ++int disable_timer_pin_1 __initdata; ++ ++/* ++ * Rough estimation of how many shared IRQs there are, can ++ * be changed anytime. ++ */ ++#define MAX_PLUS_SHARED_IRQS NR_IRQS ++#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) ++ ++/* ++ * This is performance-critical, we want to do it O(1) ++ * ++ * the indexing order of this array favors 1:1 mappings ++ * between pins and IRQs. ++ */ ++ ++static struct irq_pin_list { ++ int apic, pin, next; ++} irq_2_pin[PIN_MAP_SIZE]; ++ ++int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; ++#ifdef CONFIG_PCI_MSI ++#define vector_to_irq(vector) \ ++ (platform_legacy_irq(vector) ? vector : vector_irq[vector]) ++#else ++#define vector_to_irq(vector) (vector) ++#endif ++ ++/* ++ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are ++ * shared ISA-space IRQs, so we have to support them. We are super ++ * fast in the common case, and fast for shared ISA-space IRQs. ++ */ ++static void add_pin_to_irq(unsigned int irq, int apic, int pin) ++{ ++ static int first_free_entry = NR_IRQS; ++ struct irq_pin_list *entry = irq_2_pin + irq; ++ ++ while (entry->next) ++ entry = irq_2_pin + entry->next; ++ ++ if (entry->pin != -1) { ++ entry->next = first_free_entry; ++ entry = irq_2_pin + entry->next; ++ if (++first_free_entry >= PIN_MAP_SIZE) ++ panic("io_apic.c: whoops"); ++ } ++ entry->apic = apic; ++ entry->pin = pin; ++} ++ ++#ifdef CONFIG_XEN ++#define clear_IO_APIC() ((void)0) ++#else ++/* ++ * Reroute an IRQ to a different pin. ++ */ ++static void __init replace_pin_at_irq(unsigned int irq, ++ int oldapic, int oldpin, ++ int newapic, int newpin) ++{ ++ struct irq_pin_list *entry = irq_2_pin + irq; ++ ++ while (1) { ++ if (entry->apic == oldapic && entry->pin == oldpin) { ++ entry->apic = newapic; ++ entry->pin = newpin; ++ } ++ if (!entry->next) ++ break; ++ entry = irq_2_pin + entry->next; ++ } ++} ++ ++static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) ++{ ++ struct irq_pin_list *entry = irq_2_pin + irq; ++ unsigned int pin, reg; ++ ++ for (;;) { ++ pin = entry->pin; ++ if (pin == -1) ++ break; ++ reg = io_apic_read(entry->apic, 0x10 + pin*2); ++ reg &= ~disable; ++ reg |= enable; ++ io_apic_modify(entry->apic, 0x10 + pin*2, reg); ++ if (!entry->next) ++ break; ++ entry = irq_2_pin + entry->next; ++ } ++} ++ ++/* mask = 1 */ ++static void __mask_IO_APIC_irq (unsigned int irq) ++{ ++ __modify_IO_APIC_irq(irq, 0x00010000, 0); ++} ++ ++/* mask = 0 */ ++static void __unmask_IO_APIC_irq (unsigned int irq) ++{ ++ __modify_IO_APIC_irq(irq, 0, 0x00010000); ++} ++ ++/* mask = 1, trigger = 0 */ ++static void __mask_and_edge_IO_APIC_irq (unsigned int irq) ++{ ++ __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); ++} ++ ++/* mask = 0, trigger = 1 */ ++static void __unmask_and_level_IO_APIC_irq (unsigned int irq) ++{ ++ __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); ++} ++ ++static void mask_IO_APIC_irq (unsigned int irq) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ __mask_IO_APIC_irq(irq); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++static void unmask_IO_APIC_irq (unsigned int irq) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ __unmask_IO_APIC_irq(irq); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) ++{ ++ struct IO_APIC_route_entry entry; ++ unsigned long flags; ++ ++ /* Check delivery_mode to be sure we're not clearing an SMI pin */ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); ++ *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ if (entry.delivery_mode == dest_SMI) ++ return; ++ ++ /* ++ * Disable it in the IO-APIC irq-routing table: ++ */ ++ memset(&entry, 0, sizeof(entry)); ++ entry.mask = 1; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); ++ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++static void clear_IO_APIC (void) ++{ ++ int apic, pin; ++ ++ for (apic = 0; apic < nr_ioapics; apic++) ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) ++ clear_IO_APIC_pin(apic, pin); ++} ++ ++#ifdef CONFIG_SMP ++static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) ++{ ++ unsigned long flags; ++ int pin; ++ struct irq_pin_list *entry = irq_2_pin + irq; ++ unsigned int apicid_value; ++ cpumask_t tmp; ++ ++ cpus_and(tmp, cpumask, cpu_online_map); ++ if (cpus_empty(tmp)) ++ tmp = TARGET_CPUS; ++ ++ cpus_and(cpumask, tmp, CPU_MASK_ALL); ++ ++ apicid_value = cpu_mask_to_apicid(cpumask); ++ /* Prepare to do the io_apic_write */ ++ apicid_value = apicid_value << 24; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ for (;;) { ++ pin = entry->pin; ++ if (pin == -1) ++ break; ++ io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); ++ if (!entry->next) ++ break; ++ entry = irq_2_pin + entry->next; ++ } ++ set_irq_info(irq, cpumask); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++#if defined(CONFIG_IRQBALANCE) ++# include /* kernel_thread() */ ++# include /* kstat */ ++# include /* kmalloc() */ ++# include /* time_after() */ ++ ++#ifdef CONFIG_BALANCED_IRQ_DEBUG ++# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0) ++# define Dprintk(x...) do { TDprintk(x); } while (0) ++# else ++# define TDprintk(x...) ++# define Dprintk(x...) ++# endif ++ ++#define IRQBALANCE_CHECK_ARCH -999 ++#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) ++#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) ++#define BALANCED_IRQ_MORE_DELTA (HZ/10) ++#define BALANCED_IRQ_LESS_DELTA (HZ) ++ ++static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH; ++static int physical_balance __read_mostly; ++static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; ++ ++static struct irq_cpu_info { ++ unsigned long * last_irq; ++ unsigned long * irq_delta; ++ unsigned long irq; ++} irq_cpu_data[NR_CPUS]; ++ ++#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) ++#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq]) ++#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq]) ++ ++#define IDLE_ENOUGH(cpu,now) \ ++ (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) ++ ++#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) ++ ++#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) ++ ++static cpumask_t balance_irq_affinity[NR_IRQS] = { ++ [0 ... NR_IRQS-1] = CPU_MASK_ALL ++}; ++ ++void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) ++{ ++ balance_irq_affinity[irq] = mask; ++} ++ ++static unsigned long move(int curr_cpu, cpumask_t allowed_mask, ++ unsigned long now, int direction) ++{ ++ int search_idle = 1; ++ int cpu = curr_cpu; ++ ++ goto inside; ++ ++ do { ++ if (unlikely(cpu == curr_cpu)) ++ search_idle = 0; ++inside: ++ if (direction == 1) { ++ cpu++; ++ if (cpu >= NR_CPUS) ++ cpu = 0; ++ } else { ++ cpu--; ++ if (cpu == -1) ++ cpu = NR_CPUS-1; ++ } ++ } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) || ++ (search_idle && !IDLE_ENOUGH(cpu,now))); ++ ++ return cpu; ++} ++ ++static inline void balance_irq(int cpu, int irq) ++{ ++ unsigned long now = jiffies; ++ cpumask_t allowed_mask; ++ unsigned int new_cpu; ++ ++ if (irqbalance_disabled) ++ return; ++ ++ cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); ++ new_cpu = move(cpu, allowed_mask, now, 1); ++ if (cpu != new_cpu) { ++ set_pending_irq(irq, cpumask_of_cpu(new_cpu)); ++ } ++} ++ ++static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) ++{ ++ int i, j; ++ Dprintk("Rotating IRQs among CPUs.\n"); ++ for_each_online_cpu(i) { ++ for (j = 0; j < NR_IRQS; j++) { ++ if (!irq_desc[j].action) ++ continue; ++ /* Is it a significant load ? */ ++ if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) < ++ useful_load_threshold) ++ continue; ++ balance_irq(i, j); ++ } ++ } ++ balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, ++ balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); ++ return; ++} ++ ++static void do_irq_balance(void) ++{ ++ int i, j; ++ unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); ++ unsigned long move_this_load = 0; ++ int max_loaded = 0, min_loaded = 0; ++ int load; ++ unsigned long useful_load_threshold = balanced_irq_interval + 10; ++ int selected_irq; ++ int tmp_loaded, first_attempt = 1; ++ unsigned long tmp_cpu_irq; ++ unsigned long imbalance = 0; ++ cpumask_t allowed_mask, target_cpu_mask, tmp; ++ ++ for_each_possible_cpu(i) { ++ int package_index; ++ CPU_IRQ(i) = 0; ++ if (!cpu_online(i)) ++ continue; ++ package_index = CPU_TO_PACKAGEINDEX(i); ++ for (j = 0; j < NR_IRQS; j++) { ++ unsigned long value_now, delta; ++ /* Is this an active IRQ? */ ++ if (!irq_desc[j].action) ++ continue; ++ if ( package_index == i ) ++ IRQ_DELTA(package_index,j) = 0; ++ /* Determine the total count per processor per IRQ */ ++ value_now = (unsigned long) kstat_cpu(i).irqs[j]; ++ ++ /* Determine the activity per processor per IRQ */ ++ delta = value_now - LAST_CPU_IRQ(i,j); ++ ++ /* Update last_cpu_irq[][] for the next time */ ++ LAST_CPU_IRQ(i,j) = value_now; ++ ++ /* Ignore IRQs whose rate is less than the clock */ ++ if (delta < useful_load_threshold) ++ continue; ++ /* update the load for the processor or package total */ ++ IRQ_DELTA(package_index,j) += delta; ++ ++ /* Keep track of the higher numbered sibling as well */ ++ if (i != package_index) ++ CPU_IRQ(i) += delta; ++ /* ++ * We have sibling A and sibling B in the package ++ * ++ * cpu_irq[A] = load for cpu A + load for cpu B ++ * cpu_irq[B] = load for cpu B ++ */ ++ CPU_IRQ(package_index) += delta; ++ } ++ } ++ /* Find the least loaded processor package */ ++ for_each_online_cpu(i) { ++ if (i != CPU_TO_PACKAGEINDEX(i)) ++ continue; ++ if (min_cpu_irq > CPU_IRQ(i)) { ++ min_cpu_irq = CPU_IRQ(i); ++ min_loaded = i; ++ } ++ } ++ max_cpu_irq = ULONG_MAX; ++ ++tryanothercpu: ++ /* Look for heaviest loaded processor. ++ * We may come back to get the next heaviest loaded processor. ++ * Skip processors with trivial loads. ++ */ ++ tmp_cpu_irq = 0; ++ tmp_loaded = -1; ++ for_each_online_cpu(i) { ++ if (i != CPU_TO_PACKAGEINDEX(i)) ++ continue; ++ if (max_cpu_irq <= CPU_IRQ(i)) ++ continue; ++ if (tmp_cpu_irq < CPU_IRQ(i)) { ++ tmp_cpu_irq = CPU_IRQ(i); ++ tmp_loaded = i; ++ } ++ } ++ ++ if (tmp_loaded == -1) { ++ /* In the case of small number of heavy interrupt sources, ++ * loading some of the cpus too much. We use Ingo's original ++ * approach to rotate them around. ++ */ ++ if (!first_attempt && imbalance >= useful_load_threshold) { ++ rotate_irqs_among_cpus(useful_load_threshold); ++ return; ++ } ++ goto not_worth_the_effort; ++ } ++ ++ first_attempt = 0; /* heaviest search */ ++ max_cpu_irq = tmp_cpu_irq; /* load */ ++ max_loaded = tmp_loaded; /* processor */ ++ imbalance = (max_cpu_irq - min_cpu_irq) / 2; ++ ++ Dprintk("max_loaded cpu = %d\n", max_loaded); ++ Dprintk("min_loaded cpu = %d\n", min_loaded); ++ Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq); ++ Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq); ++ Dprintk("load imbalance = %lu\n", imbalance); ++ ++ /* if imbalance is less than approx 10% of max load, then ++ * observe diminishing returns action. - quit ++ */ ++ if (imbalance < (max_cpu_irq >> 3)) { ++ Dprintk("Imbalance too trivial\n"); ++ goto not_worth_the_effort; ++ } ++ ++tryanotherirq: ++ /* if we select an IRQ to move that can't go where we want, then ++ * see if there is another one to try. ++ */ ++ move_this_load = 0; ++ selected_irq = -1; ++ for (j = 0; j < NR_IRQS; j++) { ++ /* Is this an active IRQ? */ ++ if (!irq_desc[j].action) ++ continue; ++ if (imbalance <= IRQ_DELTA(max_loaded,j)) ++ continue; ++ /* Try to find the IRQ that is closest to the imbalance ++ * without going over. ++ */ ++ if (move_this_load < IRQ_DELTA(max_loaded,j)) { ++ move_this_load = IRQ_DELTA(max_loaded,j); ++ selected_irq = j; ++ } ++ } ++ if (selected_irq == -1) { ++ goto tryanothercpu; ++ } ++ ++ imbalance = move_this_load; ++ ++ /* For physical_balance case, we accumlated both load ++ * values in the one of the siblings cpu_irq[], ++ * to use the same code for physical and logical processors ++ * as much as possible. ++ * ++ * NOTE: the cpu_irq[] array holds the sum of the load for ++ * sibling A and sibling B in the slot for the lowest numbered ++ * sibling (A), _AND_ the load for sibling B in the slot for ++ * the higher numbered sibling. ++ * ++ * We seek the least loaded sibling by making the comparison ++ * (A+B)/2 vs B ++ */ ++ load = CPU_IRQ(min_loaded) >> 1; ++ for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { ++ if (load > CPU_IRQ(j)) { ++ /* This won't change cpu_sibling_map[min_loaded] */ ++ load = CPU_IRQ(j); ++ min_loaded = j; ++ } ++ } ++ ++ cpus_and(allowed_mask, ++ cpu_online_map, ++ balance_irq_affinity[selected_irq]); ++ target_cpu_mask = cpumask_of_cpu(min_loaded); ++ cpus_and(tmp, target_cpu_mask, allowed_mask); ++ ++ if (!cpus_empty(tmp)) { ++ ++ Dprintk("irq = %d moved to cpu = %d\n", ++ selected_irq, min_loaded); ++ /* mark for change destination */ ++ set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); ++ ++ /* Since we made a change, come back sooner to ++ * check for more variation. ++ */ ++ balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, ++ balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); ++ return; ++ } ++ goto tryanotherirq; ++ ++not_worth_the_effort: ++ /* ++ * if we did not find an IRQ to move, then adjust the time interval ++ * upward ++ */ ++ balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, ++ balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); ++ Dprintk("IRQ worth rotating not found\n"); ++ return; ++} ++ ++static int balanced_irq(void *unused) ++{ ++ int i; ++ unsigned long prev_balance_time = jiffies; ++ long time_remaining = balanced_irq_interval; ++ ++ daemonize("kirqd"); ++ ++ /* push everything to CPU 0 to give us a starting point. */ ++ for (i = 0 ; i < NR_IRQS ; i++) { ++ irq_desc[i].pending_mask = cpumask_of_cpu(0); ++ set_pending_irq(i, cpumask_of_cpu(0)); ++ } ++ ++ for ( ; ; ) { ++ time_remaining = schedule_timeout_interruptible(time_remaining); ++ try_to_freeze(); ++ if (time_after(jiffies, ++ prev_balance_time+balanced_irq_interval)) { ++ preempt_disable(); ++ do_irq_balance(); ++ prev_balance_time = jiffies; ++ time_remaining = balanced_irq_interval; ++ preempt_enable(); ++ } ++ } ++ return 0; ++} ++ ++static int __init balanced_irq_init(void) ++{ ++ int i; ++ struct cpuinfo_x86 *c; ++ cpumask_t tmp; ++ ++ cpus_shift_right(tmp, cpu_online_map, 2); ++ c = &boot_cpu_data; ++ /* When not overwritten by the command line ask subarchitecture. */ ++ if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) ++ irqbalance_disabled = NO_BALANCE_IRQ; ++ if (irqbalance_disabled) ++ return 0; ++ ++ /* disable irqbalance completely if there is only one processor online */ ++ if (num_online_cpus() < 2) { ++ irqbalance_disabled = 1; ++ return 0; ++ } ++ /* ++ * Enable physical balance only if more than 1 physical processor ++ * is present ++ */ ++ if (smp_num_siblings > 1 && !cpus_empty(tmp)) ++ physical_balance = 1; ++ ++ for_each_online_cpu(i) { ++ irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); ++ irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); ++ if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { ++ printk(KERN_ERR "balanced_irq_init: out of memory"); ++ goto failed; ++ } ++ memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS); ++ memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS); ++ } ++ ++ printk(KERN_INFO "Starting balanced_irq\n"); ++ if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) ++ return 0; ++ else ++ printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); ++failed: ++ for_each_possible_cpu(i) { ++ kfree(irq_cpu_data[i].irq_delta); ++ irq_cpu_data[i].irq_delta = NULL; ++ kfree(irq_cpu_data[i].last_irq); ++ irq_cpu_data[i].last_irq = NULL; ++ } ++ return 0; ++} ++ ++int __init irqbalance_disable(char *str) ++{ ++ irqbalance_disabled = 1; ++ return 1; ++} ++ ++__setup("noirqbalance", irqbalance_disable); ++ ++late_initcall(balanced_irq_init); ++#endif /* CONFIG_IRQBALANCE */ ++#endif /* CONFIG_SMP */ ++#endif ++ ++#ifndef CONFIG_SMP ++void fastcall send_IPI_self(int vector) ++{ ++#ifndef CONFIG_XEN ++ unsigned int cfg; ++ ++ /* ++ * Wait for idle. ++ */ ++ apic_wait_icr_idle(); ++ cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; ++ /* ++ * Send the IPI. The write to APIC_ICR fires this off. ++ */ ++ apic_write_around(APIC_ICR, cfg); ++#endif ++} ++#endif /* !CONFIG_SMP */ ++ ++ ++/* ++ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to ++ * specific CPU-side IRQs. ++ */ ++ ++#define MAX_PIRQS 8 ++static int pirq_entries [MAX_PIRQS]; ++static int pirqs_enabled; ++int skip_ioapic_setup; ++ ++static int __init ioapic_setup(char *str) ++{ ++ skip_ioapic_setup = 1; ++ return 1; ++} ++ ++__setup("noapic", ioapic_setup); ++ ++static int __init ioapic_pirq_setup(char *str) ++{ ++ int i, max; ++ int ints[MAX_PIRQS+1]; ++ ++ get_options(str, ARRAY_SIZE(ints), ints); ++ ++ for (i = 0; i < MAX_PIRQS; i++) ++ pirq_entries[i] = -1; ++ ++ pirqs_enabled = 1; ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "PIRQ redirection, working around broken MP-BIOS.\n"); ++ max = MAX_PIRQS; ++ if (ints[0] < MAX_PIRQS) ++ max = ints[0]; ++ ++ for (i = 0; i < max; i++) { ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); ++ /* ++ * PIRQs are mapped upside down, usually. ++ */ ++ pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; ++ } ++ return 1; ++} ++ ++__setup("pirq=", ioapic_pirq_setup); ++ ++/* ++ * Find the IRQ entry number of a certain pin. ++ */ ++static int find_irq_entry(int apic, int pin, int type) ++{ ++ int i; ++ ++ for (i = 0; i < mp_irq_entries; i++) ++ if (mp_irqs[i].mpc_irqtype == type && ++ (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || ++ mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && ++ mp_irqs[i].mpc_dstirq == pin) ++ return i; ++ ++ return -1; ++} ++ ++/* ++ * Find the pin to which IRQ[irq] (ISA) is connected ++ */ ++static int __init find_isa_irq_pin(int irq, int type) ++{ ++ int i; ++ ++ for (i = 0; i < mp_irq_entries; i++) { ++ int lbus = mp_irqs[i].mpc_srcbus; ++ ++ if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || ++ mp_bus_id_to_type[lbus] == MP_BUS_EISA || ++ mp_bus_id_to_type[lbus] == MP_BUS_MCA || ++ mp_bus_id_to_type[lbus] == MP_BUS_NEC98 ++ ) && ++ (mp_irqs[i].mpc_irqtype == type) && ++ (mp_irqs[i].mpc_srcbusirq == irq)) ++ ++ return mp_irqs[i].mpc_dstirq; ++ } ++ return -1; ++} ++ ++static int __init find_isa_irq_apic(int irq, int type) ++{ ++ int i; ++ ++ for (i = 0; i < mp_irq_entries; i++) { ++ int lbus = mp_irqs[i].mpc_srcbus; ++ ++ if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || ++ mp_bus_id_to_type[lbus] == MP_BUS_EISA || ++ mp_bus_id_to_type[lbus] == MP_BUS_MCA || ++ mp_bus_id_to_type[lbus] == MP_BUS_NEC98 ++ ) && ++ (mp_irqs[i].mpc_irqtype == type) && ++ (mp_irqs[i].mpc_srcbusirq == irq)) ++ break; ++ } ++ if (i < mp_irq_entries) { ++ int apic; ++ for(apic = 0; apic < nr_ioapics; apic++) { ++ if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) ++ return apic; ++ } ++ } ++ ++ return -1; ++} ++ ++/* ++ * Find a specific PCI IRQ entry. ++ * Not an __init, possibly needed by modules ++ */ ++static int pin_2_irq(int idx, int apic, int pin); ++ ++int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) ++{ ++ int apic, i, best_guess = -1; ++ ++ apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " ++ "slot:%d, pin:%d.\n", bus, slot, pin); ++ if (mp_bus_id_to_pci_bus[bus] == -1) { ++ printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); ++ return -1; ++ } ++ for (i = 0; i < mp_irq_entries; i++) { ++ int lbus = mp_irqs[i].mpc_srcbus; ++ ++ for (apic = 0; apic < nr_ioapics; apic++) ++ if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || ++ mp_irqs[i].mpc_dstapic == MP_APIC_ALL) ++ break; ++ ++ if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) && ++ !mp_irqs[i].mpc_irqtype && ++ (bus == lbus) && ++ (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { ++ int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); ++ ++ if (!(apic || IO_APIC_IRQ(irq))) ++ continue; ++ ++ if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) ++ return irq; ++ /* ++ * Use the first all-but-pin matching entry as a ++ * best-guess fuzzy result for broken mptables. ++ */ ++ if (best_guess < 0) ++ best_guess = irq; ++ } ++ } ++ return best_guess; ++} ++EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); ++ ++/* ++ * This function currently is only a helper for the i386 smp boot process where ++ * we need to reprogram the ioredtbls to cater for the cpus which have come online ++ * so mask in all cases should simply be TARGET_CPUS ++ */ ++#ifdef CONFIG_SMP ++#ifndef CONFIG_XEN ++void __init setup_ioapic_dest(void) ++{ ++ int pin, ioapic, irq, irq_entry; ++ ++ if (skip_ioapic_setup == 1) ++ return; ++ ++ for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { ++ for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { ++ irq_entry = find_irq_entry(ioapic, pin, mp_INT); ++ if (irq_entry == -1) ++ continue; ++ irq = pin_2_irq(irq_entry, ioapic, pin); ++ set_ioapic_affinity_irq(irq, TARGET_CPUS); ++ } ++ ++ } ++} ++#endif /* !CONFIG_XEN */ ++#endif ++ ++/* ++ * EISA Edge/Level control register, ELCR ++ */ ++static int EISA_ELCR(unsigned int irq) ++{ ++ if (irq < 16) { ++ unsigned int port = 0x4d0 + (irq >> 3); ++ return (inb(port) >> (irq & 7)) & 1; ++ } ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "Broken MPtable reports ISA irq %d\n", irq); ++ return 0; ++} ++ ++/* EISA interrupts are always polarity zero and can be edge or level ++ * trigger depending on the ELCR value. If an interrupt is listed as ++ * EISA conforming in the MP table, that means its trigger type must ++ * be read in from the ELCR */ ++ ++#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) ++#define default_EISA_polarity(idx) (0) ++ ++/* ISA interrupts are always polarity zero edge triggered, ++ * when listed as conforming in the MP table. */ ++ ++#define default_ISA_trigger(idx) (0) ++#define default_ISA_polarity(idx) (0) ++ ++/* PCI interrupts are always polarity one level triggered, ++ * when listed as conforming in the MP table. */ ++ ++#define default_PCI_trigger(idx) (1) ++#define default_PCI_polarity(idx) (1) ++ ++/* MCA interrupts are always polarity zero level triggered, ++ * when listed as conforming in the MP table. */ ++ ++#define default_MCA_trigger(idx) (1) ++#define default_MCA_polarity(idx) (0) ++ ++/* NEC98 interrupts are always polarity zero edge triggered, ++ * when listed as conforming in the MP table. */ ++ ++#define default_NEC98_trigger(idx) (0) ++#define default_NEC98_polarity(idx) (0) ++ ++static int __init MPBIOS_polarity(int idx) ++{ ++ int bus = mp_irqs[idx].mpc_srcbus; ++ int polarity; ++ ++ /* ++ * Determine IRQ line polarity (high active or low active): ++ */ ++ switch (mp_irqs[idx].mpc_irqflag & 3) ++ { ++ case 0: /* conforms, ie. bus-type dependent polarity */ ++ { ++ switch (mp_bus_id_to_type[bus]) ++ { ++ case MP_BUS_ISA: /* ISA pin */ ++ { ++ polarity = default_ISA_polarity(idx); ++ break; ++ } ++ case MP_BUS_EISA: /* EISA pin */ ++ { ++ polarity = default_EISA_polarity(idx); ++ break; ++ } ++ case MP_BUS_PCI: /* PCI pin */ ++ { ++ polarity = default_PCI_polarity(idx); ++ break; ++ } ++ case MP_BUS_MCA: /* MCA pin */ ++ { ++ polarity = default_MCA_polarity(idx); ++ break; ++ } ++ case MP_BUS_NEC98: /* NEC 98 pin */ ++ { ++ polarity = default_NEC98_polarity(idx); ++ break; ++ } ++ default: ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ polarity = 1; ++ break; ++ } ++ } ++ break; ++ } ++ case 1: /* high active */ ++ { ++ polarity = 0; ++ break; ++ } ++ case 2: /* reserved */ ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ polarity = 1; ++ break; ++ } ++ case 3: /* low active */ ++ { ++ polarity = 1; ++ break; ++ } ++ default: /* invalid */ ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ polarity = 1; ++ break; ++ } ++ } ++ return polarity; ++} ++ ++static int MPBIOS_trigger(int idx) ++{ ++ int bus = mp_irqs[idx].mpc_srcbus; ++ int trigger; ++ ++ /* ++ * Determine IRQ trigger mode (edge or level sensitive): ++ */ ++ switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) ++ { ++ case 0: /* conforms, ie. bus-type dependent */ ++ { ++ switch (mp_bus_id_to_type[bus]) ++ { ++ case MP_BUS_ISA: /* ISA pin */ ++ { ++ trigger = default_ISA_trigger(idx); ++ break; ++ } ++ case MP_BUS_EISA: /* EISA pin */ ++ { ++ trigger = default_EISA_trigger(idx); ++ break; ++ } ++ case MP_BUS_PCI: /* PCI pin */ ++ { ++ trigger = default_PCI_trigger(idx); ++ break; ++ } ++ case MP_BUS_MCA: /* MCA pin */ ++ { ++ trigger = default_MCA_trigger(idx); ++ break; ++ } ++ case MP_BUS_NEC98: /* NEC 98 pin */ ++ { ++ trigger = default_NEC98_trigger(idx); ++ break; ++ } ++ default: ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ trigger = 1; ++ break; ++ } ++ } ++ break; ++ } ++ case 1: /* edge */ ++ { ++ trigger = 0; ++ break; ++ } ++ case 2: /* reserved */ ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ trigger = 1; ++ break; ++ } ++ case 3: /* level */ ++ { ++ trigger = 1; ++ break; ++ } ++ default: /* invalid */ ++ { ++ printk(KERN_WARNING "broken BIOS!!\n"); ++ trigger = 0; ++ break; ++ } ++ } ++ return trigger; ++} ++ ++static inline int irq_polarity(int idx) ++{ ++ return MPBIOS_polarity(idx); ++} ++ ++static inline int irq_trigger(int idx) ++{ ++ return MPBIOS_trigger(idx); ++} ++ ++static int pin_2_irq(int idx, int apic, int pin) ++{ ++ int irq, i; ++ int bus = mp_irqs[idx].mpc_srcbus; ++ ++ /* ++ * Debugging check, we are in big trouble if this message pops up! ++ */ ++ if (mp_irqs[idx].mpc_dstirq != pin) ++ printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); ++ ++ switch (mp_bus_id_to_type[bus]) ++ { ++ case MP_BUS_ISA: /* ISA pin */ ++ case MP_BUS_EISA: ++ case MP_BUS_MCA: ++ case MP_BUS_NEC98: ++ { ++ irq = mp_irqs[idx].mpc_srcbusirq; ++ break; ++ } ++ case MP_BUS_PCI: /* PCI pin */ ++ { ++ /* ++ * PCI IRQs are mapped in order ++ */ ++ i = irq = 0; ++ while (i < apic) ++ irq += nr_ioapic_registers[i++]; ++ irq += pin; ++ ++ /* ++ * For MPS mode, so far only needed by ES7000 platform ++ */ ++ if (ioapic_renumber_irq) ++ irq = ioapic_renumber_irq(apic, irq); ++ ++ break; ++ } ++ default: ++ { ++ printk(KERN_ERR "unknown bus type %d.\n",bus); ++ irq = 0; ++ break; ++ } ++ } ++ ++ /* ++ * PCI IRQ command line redirection. Yes, limits are hardcoded. ++ */ ++ if ((pin >= 16) && (pin <= 23)) { ++ if (pirq_entries[pin-16] != -1) { ++ if (!pirq_entries[pin-16]) { ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ "disabling PIRQ%d\n", pin-16); ++ } else { ++ irq = pirq_entries[pin-16]; ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ "using PIRQ%d -> IRQ %d\n", ++ pin-16, irq); ++ } ++ } ++ } ++ return irq; ++} ++ ++static inline int IO_APIC_irq_trigger(int irq) ++{ ++ int apic, idx, pin; ++ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { ++ idx = find_irq_entry(apic,pin,mp_INT); ++ if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) ++ return irq_trigger(idx); ++ } ++ } ++ /* ++ * nonexistent IRQs are edge default ++ */ ++ return 0; ++} ++ ++/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ ++u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */ ++ ++int assign_irq_vector(int irq) ++{ ++ unsigned long flags; ++ int vector; ++ struct physdev_irq irq_op; ++ ++ BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); ++ ++ if (irq < PIRQ_BASE || irq - PIRQ_BASE > NR_PIRQS) ++ return -EINVAL; ++ ++ spin_lock_irqsave(&vector_lock, flags); ++ ++ if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) { ++ spin_unlock_irqrestore(&vector_lock, flags); ++ return IO_APIC_VECTOR(irq); ++ } ++ ++ irq_op.irq = irq; ++ if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { ++ spin_unlock_irqrestore(&vector_lock, flags); ++ return -ENOSPC; ++ } ++ ++ vector = irq_op.vector; ++ vector_irq[vector] = irq; ++ if (irq != AUTO_ASSIGN) ++ IO_APIC_VECTOR(irq) = vector; ++ ++ spin_unlock_irqrestore(&vector_lock, flags); ++ ++ return vector; ++} ++ ++#ifndef CONFIG_XEN ++static struct hw_interrupt_type ioapic_level_type; ++static struct hw_interrupt_type ioapic_edge_type; ++ ++#define IOAPIC_AUTO -1 ++#define IOAPIC_EDGE 0 ++#define IOAPIC_LEVEL 1 ++ ++static void ioapic_register_intr(int irq, int vector, unsigned long trigger) ++{ ++ unsigned idx; ++ ++ idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq; ++ ++ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || ++ trigger == IOAPIC_LEVEL) ++ irq_desc[idx].chip = &ioapic_level_type; ++ else ++ irq_desc[idx].chip = &ioapic_edge_type; ++ set_intr_gate(vector, interrupt[idx]); ++} ++#else ++#define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq) ++#endif ++ ++static void __init setup_IO_APIC_irqs(void) ++{ ++ struct IO_APIC_route_entry entry; ++ int apic, pin, idx, irq, first_notcon = 1, vector; ++ unsigned long flags; ++ ++ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); ++ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { ++ ++ /* ++ * add it to the IO-APIC irq-routing table: ++ */ ++ memset(&entry,0,sizeof(entry)); ++ ++ entry.delivery_mode = INT_DELIVERY_MODE; ++ entry.dest_mode = INT_DEST_MODE; ++ entry.mask = 0; /* enable IRQ */ ++ entry.dest.logical.logical_dest = ++ cpu_mask_to_apicid(TARGET_CPUS); ++ ++ idx = find_irq_entry(apic,pin,mp_INT); ++ if (idx == -1) { ++ if (first_notcon) { ++ apic_printk(APIC_VERBOSE, KERN_DEBUG ++ " IO-APIC (apicid-pin) %d-%d", ++ mp_ioapics[apic].mpc_apicid, ++ pin); ++ first_notcon = 0; ++ } else ++ apic_printk(APIC_VERBOSE, ", %d-%d", ++ mp_ioapics[apic].mpc_apicid, pin); ++ continue; ++ } ++ ++ entry.trigger = irq_trigger(idx); ++ entry.polarity = irq_polarity(idx); ++ ++ if (irq_trigger(idx)) { ++ entry.trigger = 1; ++ entry.mask = 1; ++ } ++ ++ irq = pin_2_irq(idx, apic, pin); ++ /* ++ * skip adding the timer int on secondary nodes, which causes ++ * a small but painful rift in the time-space continuum ++ */ ++ if (multi_timer_check(apic, irq)) ++ continue; ++ else ++ add_pin_to_irq(irq, apic, pin); ++ ++ if (/*!apic &&*/ !IO_APIC_IRQ(irq)) ++ continue; ++ ++ if (IO_APIC_IRQ(irq)) { ++ vector = assign_irq_vector(irq); ++ entry.vector = vector; ++ ioapic_register_intr(irq, vector, IOAPIC_AUTO); ++ ++ if (!apic && (irq < 16)) ++ disable_8259A_irq(irq); ++ } ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); ++ io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); ++ set_native_irq_info(irq, TARGET_CPUS); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ } ++ } ++ ++ if (!first_notcon) ++ apic_printk(APIC_VERBOSE, " not connected.\n"); ++} ++ ++/* ++ * Set up the 8259A-master output pin: ++ */ ++#ifndef CONFIG_XEN ++static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) ++{ ++ struct IO_APIC_route_entry entry; ++ unsigned long flags; ++ ++ memset(&entry,0,sizeof(entry)); ++ ++ disable_8259A_irq(0); ++ ++ /* mask LVT0 */ ++ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); ++ ++ /* ++ * We use logical delivery to get the timer IRQ ++ * to the first CPU. ++ */ ++ entry.dest_mode = INT_DEST_MODE; ++ entry.mask = 0; /* unmask IRQ now */ ++ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); ++ entry.delivery_mode = INT_DELIVERY_MODE; ++ entry.polarity = 0; ++ entry.trigger = 0; ++ entry.vector = vector; ++ ++ /* ++ * The timer IRQ doesn't have to know that behind the ++ * scene we have a 8259A-master in AEOI mode ... ++ */ ++ irq_desc[0].chip = &ioapic_edge_type; ++ ++ /* ++ * Add it to the IO-APIC irq-routing table: ++ */ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); ++ io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ enable_8259A_irq(0); ++} ++ ++static inline void UNEXPECTED_IO_APIC(void) ++{ ++} ++ ++void __init print_IO_APIC(void) ++{ ++ int apic, i; ++ union IO_APIC_reg_00 reg_00; ++ union IO_APIC_reg_01 reg_01; ++ union IO_APIC_reg_02 reg_02; ++ union IO_APIC_reg_03 reg_03; ++ unsigned long flags; ++ ++ if (apic_verbosity == APIC_QUIET) ++ return; ++ ++ printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); ++ for (i = 0; i < nr_ioapics; i++) ++ printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", ++ mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); ++ ++ /* ++ * We are a bit conservative about what we expect. We have to ++ * know about every hardware change ASAP. ++ */ ++ printk(KERN_INFO "testing the IO APIC.......................\n"); ++ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(apic, 0); ++ reg_01.raw = io_apic_read(apic, 1); ++ if (reg_01.bits.version >= 0x10) ++ reg_02.raw = io_apic_read(apic, 2); ++ if (reg_01.bits.version >= 0x20) ++ reg_03.raw = io_apic_read(apic, 3); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); ++ printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); ++ printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); ++ printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); ++ printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); ++ if (reg_00.bits.ID >= get_physical_broadcast()) ++ UNEXPECTED_IO_APIC(); ++ if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) ++ UNEXPECTED_IO_APIC(); ++ ++ printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); ++ printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); ++ if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */ ++ (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */ ++ (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */ ++ (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */ ++ (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */ ++ (reg_01.bits.entries != 0x2E) && ++ (reg_01.bits.entries != 0x3F) ++ ) ++ UNEXPECTED_IO_APIC(); ++ ++ printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); ++ printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); ++ if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */ ++ (reg_01.bits.version != 0x10) && /* oldest IO-APICs */ ++ (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */ ++ (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */ ++ (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */ ++ ) ++ UNEXPECTED_IO_APIC(); ++ if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2) ++ UNEXPECTED_IO_APIC(); ++ ++ /* ++ * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, ++ * but the value of reg_02 is read as the previous read register ++ * value, so ignore it if reg_02 == reg_01. ++ */ ++ if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { ++ printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); ++ printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); ++ if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2) ++ UNEXPECTED_IO_APIC(); ++ } ++ ++ /* ++ * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 ++ * or reg_03, but the value of reg_0[23] is read as the previous read ++ * register value, so ignore it if reg_03 == reg_0[12]. ++ */ ++ if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && ++ reg_03.raw != reg_01.raw) { ++ printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); ++ printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); ++ if (reg_03.bits.__reserved_1) ++ UNEXPECTED_IO_APIC(); ++ } ++ ++ printk(KERN_DEBUG ".... IRQ redirection table:\n"); ++ ++ printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" ++ " Stat Dest Deli Vect: \n"); ++ ++ for (i = 0; i <= reg_01.bits.entries; i++) { ++ struct IO_APIC_route_entry entry; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); ++ *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ printk(KERN_DEBUG " %02x %03X %02X ", ++ i, ++ entry.dest.logical.logical_dest, ++ entry.dest.physical.physical_dest ++ ); ++ ++ printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", ++ entry.mask, ++ entry.trigger, ++ entry.irr, ++ entry.polarity, ++ entry.delivery_status, ++ entry.dest_mode, ++ entry.delivery_mode, ++ entry.vector ++ ); ++ } ++ } ++ if (use_pci_vector()) ++ printk(KERN_INFO "Using vector-based indexing\n"); ++ printk(KERN_DEBUG "IRQ to pin mappings:\n"); ++ for (i = 0; i < NR_IRQS; i++) { ++ struct irq_pin_list *entry = irq_2_pin + i; ++ if (entry->pin < 0) ++ continue; ++ if (use_pci_vector() && !platform_legacy_irq(i)) ++ printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i)); ++ else ++ printk(KERN_DEBUG "IRQ%d ", i); ++ for (;;) { ++ printk("-> %d:%d", entry->apic, entry->pin); ++ if (!entry->next) ++ break; ++ entry = irq_2_pin + entry->next; ++ } ++ printk("\n"); ++ } ++ ++ printk(KERN_INFO ".................................... done.\n"); ++ ++ return; ++} ++ ++static void print_APIC_bitfield (int base) ++{ ++ unsigned int v; ++ int i, j; ++ ++ if (apic_verbosity == APIC_QUIET) ++ return; ++ ++ printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); ++ for (i = 0; i < 8; i++) { ++ v = apic_read(base + i*0x10); ++ for (j = 0; j < 32; j++) { ++ if (v & (1< 3) /* Due to the Pentium erratum 3AP. */ ++ apic_write(APIC_ESR, 0); ++ v = apic_read(APIC_ESR); ++ printk(KERN_DEBUG "... APIC ESR: %08x\n", v); ++ } ++ ++ v = apic_read(APIC_ICR); ++ printk(KERN_DEBUG "... APIC ICR: %08x\n", v); ++ v = apic_read(APIC_ICR2); ++ printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); ++ ++ v = apic_read(APIC_LVTT); ++ printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); ++ ++ if (maxlvt > 3) { /* PC is LVT#4. */ ++ v = apic_read(APIC_LVTPC); ++ printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); ++ } ++ v = apic_read(APIC_LVT0); ++ printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); ++ v = apic_read(APIC_LVT1); ++ printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); ++ ++ if (maxlvt > 2) { /* ERR is LVT#3. */ ++ v = apic_read(APIC_LVTERR); ++ printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); ++ } ++ ++ v = apic_read(APIC_TMICT); ++ printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); ++ v = apic_read(APIC_TMCCT); ++ printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); ++ v = apic_read(APIC_TDCR); ++ printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); ++ printk("\n"); ++} ++ ++void print_all_local_APICs (void) ++{ ++ on_each_cpu(print_local_APIC, NULL, 1, 1); ++} ++ ++void /*__init*/ print_PIC(void) ++{ ++ unsigned int v; ++ unsigned long flags; ++ ++ if (apic_verbosity == APIC_QUIET) ++ return; ++ ++ printk(KERN_DEBUG "\nprinting PIC contents\n"); ++ ++ spin_lock_irqsave(&i8259A_lock, flags); ++ ++ v = inb(0xa1) << 8 | inb(0x21); ++ printk(KERN_DEBUG "... PIC IMR: %04x\n", v); ++ ++ v = inb(0xa0) << 8 | inb(0x20); ++ printk(KERN_DEBUG "... PIC IRR: %04x\n", v); ++ ++ outb(0x0b,0xa0); ++ outb(0x0b,0x20); ++ v = inb(0xa0) << 8 | inb(0x20); ++ outb(0x0a,0xa0); ++ outb(0x0a,0x20); ++ ++ spin_unlock_irqrestore(&i8259A_lock, flags); ++ ++ printk(KERN_DEBUG "... PIC ISR: %04x\n", v); ++ ++ v = inb(0x4d1) << 8 | inb(0x4d0); ++ printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); ++} ++#endif /* !CONFIG_XEN */ ++ ++static void __init enable_IO_APIC(void) ++{ ++ union IO_APIC_reg_01 reg_01; ++ int i8259_apic, i8259_pin; ++ int i, apic; ++ unsigned long flags; ++ ++ for (i = 0; i < PIN_MAP_SIZE; i++) { ++ irq_2_pin[i].pin = -1; ++ irq_2_pin[i].next = 0; ++ } ++ if (!pirqs_enabled) ++ for (i = 0; i < MAX_PIRQS; i++) ++ pirq_entries[i] = -1; ++ ++ /* ++ * The number of IO-APIC IRQ registers (== #pins): ++ */ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_01.raw = io_apic_read(apic, 1); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ nr_ioapic_registers[apic] = reg_01.bits.entries+1; ++ } ++ for(apic = 0; apic < nr_ioapics; apic++) { ++ int pin; ++ /* See if any of the pins is in ExtINT mode */ ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { ++ struct IO_APIC_route_entry entry; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); ++ *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ ++ /* If the interrupt line is enabled and in ExtInt mode ++ * I have found the pin where the i8259 is connected. ++ */ ++ if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { ++ ioapic_i8259.apic = apic; ++ ioapic_i8259.pin = pin; ++ goto found_i8259; ++ } ++ } ++ } ++ found_i8259: ++ /* Look to see what if the MP table has reported the ExtINT */ ++ /* If we could not find the appropriate pin by looking at the ioapic ++ * the i8259 probably is not connected the ioapic but give the ++ * mptable a chance anyway. ++ */ ++ i8259_pin = find_isa_irq_pin(0, mp_ExtINT); ++ i8259_apic = find_isa_irq_apic(0, mp_ExtINT); ++ /* Trust the MP table if nothing is setup in the hardware */ ++ if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { ++ printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); ++ ioapic_i8259.pin = i8259_pin; ++ ioapic_i8259.apic = i8259_apic; ++ } ++ /* Complain if the MP table and the hardware disagree */ ++ if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && ++ (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) ++ { ++ printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); ++ } ++ ++ /* ++ * Do not trust the IO-APIC being empty at bootup ++ */ ++ clear_IO_APIC(); ++} ++ ++/* ++ * Not an __init, needed by the reboot code ++ */ ++void disable_IO_APIC(void) ++{ ++ /* ++ * Clear the IO-APIC before rebooting: ++ */ ++ clear_IO_APIC(); ++ ++#ifndef CONFIG_XEN ++ /* ++ * If the i8259 is routed through an IOAPIC ++ * Put that IOAPIC in virtual wire mode ++ * so legacy interrupts can be delivered. ++ */ ++ if (ioapic_i8259.pin != -1) { ++ struct IO_APIC_route_entry entry; ++ unsigned long flags; ++ ++ memset(&entry, 0, sizeof(entry)); ++ entry.mask = 0; /* Enabled */ ++ entry.trigger = 0; /* Edge */ ++ entry.irr = 0; ++ entry.polarity = 0; /* High */ ++ entry.delivery_status = 0; ++ entry.dest_mode = 0; /* Physical */ ++ entry.delivery_mode = dest_ExtINT; /* ExtInt */ ++ entry.vector = 0; ++ entry.dest.physical.physical_dest = ++ GET_APIC_ID(apic_read(APIC_ID)); ++ ++ /* ++ * Add it to the IO-APIC irq-routing table: ++ */ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin, ++ *(((int *)&entry)+1)); ++ io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin, ++ *(((int *)&entry)+0)); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ } ++ disconnect_bsp_APIC(ioapic_i8259.pin != -1); ++#endif ++} ++ ++/* ++ * function to set the IO-APIC physical IDs based on the ++ * values stored in the MPC table. ++ * ++ * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 ++ */ ++ ++#if !defined(CONFIG_XEN) && !defined(CONFIG_X86_NUMAQ) ++static void __init setup_ioapic_ids_from_mpc(void) ++{ ++ union IO_APIC_reg_00 reg_00; ++ physid_mask_t phys_id_present_map; ++ int apic; ++ int i; ++ unsigned char old_id; ++ unsigned long flags; ++ ++ /* ++ * Don't check I/O APIC IDs for xAPIC systems. They have ++ * no meaning without the serial APIC bus. ++ */ ++ if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ++ || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) ++ return; ++ /* ++ * This is broken; anything with a real cpu count has to ++ * circumvent this idiocy regardless. ++ */ ++ phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); ++ ++ /* ++ * Set the IOAPIC ID to the value stored in the MPC table. ++ */ ++ for (apic = 0; apic < nr_ioapics; apic++) { ++ ++ /* Read the register 0 value */ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(apic, 0); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ old_id = mp_ioapics[apic].mpc_apicid; ++ ++ if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { ++ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", ++ apic, mp_ioapics[apic].mpc_apicid); ++ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", ++ reg_00.bits.ID); ++ mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; ++ } ++ ++ /* ++ * Sanity check, is the ID really free? Every APIC in a ++ * system must have a unique ID or we get lots of nice ++ * 'stuck on smp_invalidate_needed IPI wait' messages. ++ */ ++ if (check_apicid_used(phys_id_present_map, ++ mp_ioapics[apic].mpc_apicid)) { ++ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", ++ apic, mp_ioapics[apic].mpc_apicid); ++ for (i = 0; i < get_physical_broadcast(); i++) ++ if (!physid_isset(i, phys_id_present_map)) ++ break; ++ if (i >= get_physical_broadcast()) ++ panic("Max APIC ID exceeded!\n"); ++ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", ++ i); ++ physid_set(i, phys_id_present_map); ++ mp_ioapics[apic].mpc_apicid = i; ++ } else { ++ physid_mask_t tmp; ++ tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid); ++ apic_printk(APIC_VERBOSE, "Setting %d in the " ++ "phys_id_present_map\n", ++ mp_ioapics[apic].mpc_apicid); ++ physids_or(phys_id_present_map, phys_id_present_map, tmp); ++ } ++ ++ ++ /* ++ * We need to adjust the IRQ routing table ++ * if the ID changed. ++ */ ++ if (old_id != mp_ioapics[apic].mpc_apicid) ++ for (i = 0; i < mp_irq_entries; i++) ++ if (mp_irqs[i].mpc_dstapic == old_id) ++ mp_irqs[i].mpc_dstapic ++ = mp_ioapics[apic].mpc_apicid; ++ ++ /* ++ * Read the right value from the MPC table and ++ * write it into the ID register. ++ */ ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "...changing IO-APIC physical APIC ID to %d ...", ++ mp_ioapics[apic].mpc_apicid); ++ ++ reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0, reg_00.raw); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ /* ++ * Sanity check ++ */ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(apic, 0); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) ++ printk("could not set ID!\n"); ++ else ++ apic_printk(APIC_VERBOSE, " ok.\n"); ++ } ++} ++#else ++static void __init setup_ioapic_ids_from_mpc(void) { } ++#endif ++ ++#ifndef CONFIG_XEN ++/* ++ * There is a nasty bug in some older SMP boards, their mptable lies ++ * about the timer IRQ. We do the following to work around the situation: ++ * ++ * - timer IRQ defaults to IO-APIC IRQ ++ * - if this function detects that timer IRQs are defunct, then we fall ++ * back to ISA timer IRQs ++ */ ++static int __init timer_irq_works(void) ++{ ++ unsigned long t1 = jiffies; ++ ++ local_irq_enable(); ++ /* Let ten ticks pass... */ ++ mdelay((10 * 1000) / HZ); ++ ++ /* ++ * Expect a few ticks at least, to be sure some possible ++ * glue logic does not lock up after one or two first ++ * ticks in a non-ExtINT mode. Also the local APIC ++ * might have cached one ExtINT interrupt. Finally, at ++ * least one tick may be lost due to delays. ++ */ ++ if (jiffies - t1 > 4) ++ return 1; ++ ++ return 0; ++} ++ ++/* ++ * In the SMP+IOAPIC case it might happen that there are an unspecified ++ * number of pending IRQ events unhandled. These cases are very rare, ++ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much ++ * better to do it this way as thus we do not have to be aware of ++ * 'pending' interrupts in the IRQ path, except at this point. ++ */ ++/* ++ * Edge triggered needs to resend any interrupt ++ * that was delayed but this is now handled in the device ++ * independent code. ++ */ ++ ++/* ++ * Starting up a edge-triggered IO-APIC interrupt is ++ * nasty - we need to make sure that we get the edge. ++ * If it is already asserted for some reason, we need ++ * return 1 to indicate that is was pending. ++ * ++ * This is not complete - we should be able to fake ++ * an edge even if it isn't on the 8259A... ++ */ ++static unsigned int startup_edge_ioapic_irq(unsigned int irq) ++{ ++ int was_pending = 0; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ if (irq < 16) { ++ disable_8259A_irq(irq); ++ if (i8259A_irq_pending(irq)) ++ was_pending = 1; ++ } ++ __unmask_IO_APIC_irq(irq); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return was_pending; ++} ++ ++/* ++ * Once we have recorded IRQ_PENDING already, we can mask the ++ * interrupt for real. This prevents IRQ storms from unhandled ++ * devices. ++ */ ++static void ack_edge_ioapic_irq(unsigned int irq) ++{ ++ move_irq(irq); ++ if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) ++ == (IRQ_PENDING | IRQ_DISABLED)) ++ mask_IO_APIC_irq(irq); ++ ack_APIC_irq(); ++} ++ ++/* ++ * Level triggered interrupts can just be masked, ++ * and shutting down and starting up the interrupt ++ * is the same as enabling and disabling them -- except ++ * with a startup need to return a "was pending" value. ++ * ++ * Level triggered interrupts are special because we ++ * do not touch any IO-APIC register while handling ++ * them. We ack the APIC in the end-IRQ handler, not ++ * in the start-IRQ-handler. Protection against reentrance ++ * from the same interrupt is still provided, both by the ++ * generic IRQ layer and by the fact that an unacked local ++ * APIC does not accept IRQs. ++ */ ++static unsigned int startup_level_ioapic_irq (unsigned int irq) ++{ ++ unmask_IO_APIC_irq(irq); ++ ++ return 0; /* don't check for pending */ ++} ++ ++static void end_level_ioapic_irq (unsigned int irq) ++{ ++ unsigned long v; ++ int i; ++ ++ move_irq(irq); ++/* ++ * It appears there is an erratum which affects at least version 0x11 ++ * of I/O APIC (that's the 82093AA and cores integrated into various ++ * chipsets). Under certain conditions a level-triggered interrupt is ++ * erroneously delivered as edge-triggered one but the respective IRR ++ * bit gets set nevertheless. As a result the I/O unit expects an EOI ++ * message but it will never arrive and further interrupts are blocked ++ * from the source. The exact reason is so far unknown, but the ++ * phenomenon was observed when two consecutive interrupt requests ++ * from a given source get delivered to the same CPU and the source is ++ * temporarily disabled in between. ++ * ++ * A workaround is to simulate an EOI message manually. We achieve it ++ * by setting the trigger mode to edge and then to level when the edge ++ * trigger mode gets detected in the TMR of a local APIC for a ++ * level-triggered interrupt. We mask the source for the time of the ++ * operation to prevent an edge-triggered interrupt escaping meanwhile. ++ * The idea is from Manfred Spraul. --macro ++ */ ++ i = IO_APIC_VECTOR(irq); ++ ++ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); ++ ++ ack_APIC_irq(); ++ ++ if (!(v & (1 << (i & 0x1f)))) { ++ atomic_inc(&irq_mis_count); ++ spin_lock(&ioapic_lock); ++ __mask_and_edge_IO_APIC_irq(irq); ++ __unmask_and_level_IO_APIC_irq(irq); ++ spin_unlock(&ioapic_lock); ++ } ++} ++ ++#ifdef CONFIG_PCI_MSI ++static unsigned int startup_edge_ioapic_vector(unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ return startup_edge_ioapic_irq(irq); ++} ++ ++static void ack_edge_ioapic_vector(unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ move_native_irq(vector); ++ ack_edge_ioapic_irq(irq); ++} ++ ++static unsigned int startup_level_ioapic_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ return startup_level_ioapic_irq (irq); ++} ++ ++static void end_level_ioapic_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ move_native_irq(vector); ++ end_level_ioapic_irq(irq); ++} ++ ++static void mask_IO_APIC_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ mask_IO_APIC_irq(irq); ++} ++ ++static void unmask_IO_APIC_vector (unsigned int vector) ++{ ++ int irq = vector_to_irq(vector); ++ ++ unmask_IO_APIC_irq(irq); ++} ++ ++#ifdef CONFIG_SMP ++static void set_ioapic_affinity_vector (unsigned int vector, ++ cpumask_t cpu_mask) ++{ ++ int irq = vector_to_irq(vector); ++ ++ set_native_irq_info(vector, cpu_mask); ++ set_ioapic_affinity_irq(irq, cpu_mask); ++} ++#endif ++#endif ++ ++static int ioapic_retrigger(unsigned int irq) ++{ ++ send_IPI_self(IO_APIC_VECTOR(irq)); ++ ++ return 1; ++} ++ ++/* ++ * Level and edge triggered IO-APIC interrupts need different handling, ++ * so we use two separate IRQ descriptors. Edge triggered IRQs can be ++ * handled with the level-triggered descriptor, but that one has slightly ++ * more overhead. Level-triggered interrupts cannot be handled with the ++ * edge-triggered handler, without risking IRQ storms and other ugly ++ * races. ++ */ ++static struct hw_interrupt_type ioapic_edge_type __read_mostly = { ++ .typename = "IO-APIC-edge", ++ .startup = startup_edge_ioapic, ++ .shutdown = shutdown_edge_ioapic, ++ .enable = enable_edge_ioapic, ++ .disable = disable_edge_ioapic, ++ .ack = ack_edge_ioapic, ++ .end = end_edge_ioapic, ++#ifdef CONFIG_SMP ++ .set_affinity = set_ioapic_affinity, ++#endif ++ .retrigger = ioapic_retrigger, ++}; ++ ++static struct hw_interrupt_type ioapic_level_type __read_mostly = { ++ .typename = "IO-APIC-level", ++ .startup = startup_level_ioapic, ++ .shutdown = shutdown_level_ioapic, ++ .enable = enable_level_ioapic, ++ .disable = disable_level_ioapic, ++ .ack = mask_and_ack_level_ioapic, ++ .end = end_level_ioapic, ++#ifdef CONFIG_SMP ++ .set_affinity = set_ioapic_affinity, ++#endif ++ .retrigger = ioapic_retrigger, ++}; ++#endif /* !CONFIG_XEN */ ++ ++static inline void init_IO_APIC_traps(void) ++{ ++ int irq; ++ ++ /* ++ * NOTE! The local APIC isn't very good at handling ++ * multiple interrupts at the same interrupt level. ++ * As the interrupt level is determined by taking the ++ * vector number and shifting that right by 4, we ++ * want to spread these out a bit so that they don't ++ * all fall in the same interrupt level. ++ * ++ * Also, we've got to be careful not to trash gate ++ * 0x80, because int 0x80 is hm, kind of importantish. ;) ++ */ ++ for (irq = 0; irq < NR_IRQS ; irq++) { ++ int tmp = irq; ++ if (use_pci_vector()) { ++ if (!platform_legacy_irq(tmp)) ++ if ((tmp = vector_to_irq(tmp)) == -1) ++ continue; ++ } ++ if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { ++ /* ++ * Hmm.. We don't have an entry for this, ++ * so default to an old-fashioned 8259 ++ * interrupt if we can.. ++ */ ++ if (irq < 16) ++ make_8259A_irq(irq); ++#ifndef CONFIG_XEN ++ else ++ /* Strange. Oh, well.. */ ++ irq_desc[irq].chip = &no_irq_type; ++#endif ++ } ++ } ++} ++ ++#ifndef CONFIG_XEN ++static void enable_lapic_irq (unsigned int irq) ++{ ++ unsigned long v; ++ ++ v = apic_read(APIC_LVT0); ++ apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); ++} ++ ++static void disable_lapic_irq (unsigned int irq) ++{ ++ unsigned long v; ++ ++ v = apic_read(APIC_LVT0); ++ apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); ++} ++ ++static void ack_lapic_irq (unsigned int irq) ++{ ++ ack_APIC_irq(); ++} ++ ++static void end_lapic_irq (unsigned int i) { /* nothing */ } ++ ++static struct hw_interrupt_type lapic_irq_type __read_mostly = { ++ .typename = "local-APIC-edge", ++ .startup = NULL, /* startup_irq() not used for IRQ0 */ ++ .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ ++ .enable = enable_lapic_irq, ++ .disable = disable_lapic_irq, ++ .ack = ack_lapic_irq, ++ .end = end_lapic_irq ++}; ++ ++static void setup_nmi (void) ++{ ++ /* ++ * Dirty trick to enable the NMI watchdog ... ++ * We put the 8259A master into AEOI mode and ++ * unmask on all local APICs LVT0 as NMI. ++ * ++ * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') ++ * is from Maciej W. Rozycki - so we do not have to EOI from ++ * the NMI handler or the timer interrupt. ++ */ ++ apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); ++ ++ on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1); ++ ++ apic_printk(APIC_VERBOSE, " done.\n"); ++} ++ ++/* ++ * This looks a bit hackish but it's about the only one way of sending ++ * a few INTA cycles to 8259As and any associated glue logic. ICR does ++ * not support the ExtINT mode, unfortunately. We need to send these ++ * cycles as some i82489DX-based boards have glue logic that keeps the ++ * 8259A interrupt line asserted until INTA. --macro ++ */ ++static inline void unlock_ExtINT_logic(void) ++{ ++ int apic, pin, i; ++ struct IO_APIC_route_entry entry0, entry1; ++ unsigned char save_control, save_freq_select; ++ unsigned long flags; ++ ++ pin = find_isa_irq_pin(8, mp_INT); ++ apic = find_isa_irq_apic(8, mp_INT); ++ if (pin == -1) ++ return; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin); ++ *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ clear_IO_APIC_pin(apic, pin); ++ ++ memset(&entry1, 0, sizeof(entry1)); ++ ++ entry1.dest_mode = 0; /* physical delivery */ ++ entry1.mask = 0; /* unmask IRQ now */ ++ entry1.dest.physical.physical_dest = hard_smp_processor_id(); ++ entry1.delivery_mode = dest_ExtINT; ++ entry1.polarity = entry0.polarity; ++ entry1.trigger = 0; ++ entry1.vector = 0; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); ++ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ save_control = CMOS_READ(RTC_CONTROL); ++ save_freq_select = CMOS_READ(RTC_FREQ_SELECT); ++ CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, ++ RTC_FREQ_SELECT); ++ CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); ++ ++ i = 100; ++ while (i-- > 0) { ++ mdelay(10); ++ if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) ++ i -= 10; ++ } ++ ++ CMOS_WRITE(save_control, RTC_CONTROL); ++ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); ++ clear_IO_APIC_pin(apic, pin); ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); ++ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++int timer_uses_ioapic_pin_0; ++ ++/* ++ * This code may look a bit paranoid, but it's supposed to cooperate with ++ * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ ++ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast ++ * fanatically on his truly buggy board. ++ */ ++static inline void check_timer(void) ++{ ++ int apic1, pin1, apic2, pin2; ++ int vector; ++ ++ /* ++ * get/set the timer IRQ vector: ++ */ ++ disable_8259A_irq(0); ++ vector = assign_irq_vector(0); ++ set_intr_gate(vector, interrupt[0]); ++ ++ /* ++ * Subtle, code in do_timer_interrupt() expects an AEOI ++ * mode for the 8259A whenever interrupts are routed ++ * through I/O APICs. Also IRQ0 has to be enabled in ++ * the 8259A which implies the virtual wire has to be ++ * disabled in the local APIC. ++ */ ++ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); ++ init_8259A(1); ++ timer_ack = 1; ++ if (timer_over_8254 > 0) ++ enable_8259A_irq(0); ++ ++ pin1 = find_isa_irq_pin(0, mp_INT); ++ apic1 = find_isa_irq_apic(0, mp_INT); ++ pin2 = ioapic_i8259.pin; ++ apic2 = ioapic_i8259.apic; ++ ++ if (pin1 == 0) ++ timer_uses_ioapic_pin_0 = 1; ++ ++ printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", ++ vector, apic1, pin1, apic2, pin2); ++ ++ if (pin1 != -1) { ++ /* ++ * Ok, does IRQ0 through the IOAPIC work? ++ */ ++ unmask_IO_APIC_irq(0); ++ if (timer_irq_works()) { ++ if (nmi_watchdog == NMI_IO_APIC) { ++ disable_8259A_irq(0); ++ setup_nmi(); ++ enable_8259A_irq(0); ++ } ++ if (disable_timer_pin_1 > 0) ++ clear_IO_APIC_pin(0, pin1); ++ return; ++ } ++ clear_IO_APIC_pin(apic1, pin1); ++ printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to " ++ "IO-APIC\n"); ++ } ++ ++ printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); ++ if (pin2 != -1) { ++ printk("\n..... (found pin %d) ...", pin2); ++ /* ++ * legacy devices should be connected to IO APIC #0 ++ */ ++ setup_ExtINT_IRQ0_pin(apic2, pin2, vector); ++ if (timer_irq_works()) { ++ printk("works.\n"); ++ if (pin1 != -1) ++ replace_pin_at_irq(0, apic1, pin1, apic2, pin2); ++ else ++ add_pin_to_irq(0, apic2, pin2); ++ if (nmi_watchdog == NMI_IO_APIC) { ++ setup_nmi(); ++ } ++ return; ++ } ++ /* ++ * Cleanup, just in case ... ++ */ ++ clear_IO_APIC_pin(apic2, pin2); ++ } ++ printk(" failed.\n"); ++ ++ if (nmi_watchdog == NMI_IO_APIC) { ++ printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); ++ nmi_watchdog = 0; ++ } ++ ++ printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); ++ ++ disable_8259A_irq(0); ++ irq_desc[0].chip = &lapic_irq_type; ++ apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ ++ enable_8259A_irq(0); ++ ++ if (timer_irq_works()) { ++ printk(" works.\n"); ++ return; ++ } ++ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); ++ printk(" failed.\n"); ++ ++ printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); ++ ++ timer_ack = 0; ++ init_8259A(0); ++ make_8259A_irq(0); ++ apic_write_around(APIC_LVT0, APIC_DM_EXTINT); ++ ++ unlock_ExtINT_logic(); ++ ++ if (timer_irq_works()) { ++ printk(" works.\n"); ++ return; ++ } ++ printk(" failed :(.\n"); ++ panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " ++ "report. Then try booting with the 'noapic' option"); ++} ++#else ++int timer_uses_ioapic_pin_0 = 0; ++#define check_timer() ((void)0) ++#endif ++ ++/* ++ * ++ * IRQ's that are handled by the PIC in the MPS IOAPIC case. ++ * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. ++ * Linux doesn't really care, as it's not actually used ++ * for any interrupt handling anyway. ++ */ ++#define PIC_IRQS (1 << PIC_CASCADE_IR) ++ ++void __init setup_IO_APIC(void) ++{ ++ enable_IO_APIC(); ++ ++ if (acpi_ioapic) ++ io_apic_irqs = ~0; /* all IRQs go through IOAPIC */ ++ else ++ io_apic_irqs = ~PIC_IRQS; ++ ++ printk("ENABLING IO-APIC IRQs\n"); ++ ++ /* ++ * Set up IO-APIC IRQ routing. ++ */ ++ if (!acpi_ioapic) ++ setup_ioapic_ids_from_mpc(); ++#ifndef CONFIG_XEN ++ sync_Arb_IDs(); ++#endif ++ setup_IO_APIC_irqs(); ++ init_IO_APIC_traps(); ++ check_timer(); ++ if (!acpi_ioapic) ++ print_IO_APIC(); ++} ++ ++static int __init setup_disable_8254_timer(char *s) ++{ ++ timer_over_8254 = -1; ++ return 1; ++} ++static int __init setup_enable_8254_timer(char *s) ++{ ++ timer_over_8254 = 2; ++ return 1; ++} ++ ++__setup("disable_8254_timer", setup_disable_8254_timer); ++__setup("enable_8254_timer", setup_enable_8254_timer); ++ ++/* ++ * Called after all the initialization is done. If we didnt find any ++ * APIC bugs then we can allow the modify fast path ++ */ ++ ++static int __init io_apic_bug_finalize(void) ++{ ++ if(sis_apic_bug == -1) ++ sis_apic_bug = 0; ++ if (is_initial_xendomain()) { ++ struct xen_platform_op op = { .cmd = XENPF_platform_quirk }; ++ op.u.platform_quirk.quirk_id = sis_apic_bug ? ++ QUIRK_IOAPIC_BAD_REGSEL : QUIRK_IOAPIC_GOOD_REGSEL; ++ VOID(HYPERVISOR_platform_op(&op)); ++ } ++ return 0; ++} ++ ++late_initcall(io_apic_bug_finalize); ++ ++struct sysfs_ioapic_data { ++ struct sys_device dev; ++ struct IO_APIC_route_entry entry[0]; ++}; ++static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; ++ ++static int ioapic_suspend(struct sys_device *dev, pm_message_t state) ++{ ++ struct IO_APIC_route_entry *entry; ++ struct sysfs_ioapic_data *data; ++ unsigned long flags; ++ int i; ++ ++ data = container_of(dev, struct sysfs_ioapic_data, dev); ++ entry = data->entry; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { ++ *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); ++ *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); ++ } ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return 0; ++} ++ ++static int ioapic_resume(struct sys_device *dev) ++{ ++ struct IO_APIC_route_entry *entry; ++ struct sysfs_ioapic_data *data; ++ unsigned long flags; ++ union IO_APIC_reg_00 reg_00; ++ int i; ++ ++ data = container_of(dev, struct sysfs_ioapic_data, dev); ++ entry = data->entry; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(dev->id, 0); ++ if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { ++ reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; ++ io_apic_write(dev->id, 0, reg_00.raw); ++ } ++ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { ++ io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); ++ io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); ++ } ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return 0; ++} ++ ++static struct sysdev_class ioapic_sysdev_class = { ++ set_kset_name("ioapic"), ++#ifndef CONFIG_XEN ++ .suspend = ioapic_suspend, ++ .resume = ioapic_resume, ++#endif ++}; ++ ++static int __init ioapic_init_sysfs(void) ++{ ++ struct sys_device * dev; ++ int i, size, error = 0; ++ ++ error = sysdev_class_register(&ioapic_sysdev_class); ++ if (error) ++ return error; ++ ++ for (i = 0; i < nr_ioapics; i++ ) { ++ size = sizeof(struct sys_device) + nr_ioapic_registers[i] ++ * sizeof(struct IO_APIC_route_entry); ++ mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); ++ if (!mp_ioapic_data[i]) { ++ printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); ++ continue; ++ } ++ memset(mp_ioapic_data[i], 0, size); ++ dev = &mp_ioapic_data[i]->dev; ++ dev->id = i; ++ dev->cls = &ioapic_sysdev_class; ++ error = sysdev_register(dev); ++ if (error) { ++ kfree(mp_ioapic_data[i]); ++ mp_ioapic_data[i] = NULL; ++ printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); ++ continue; ++ } ++ } ++ ++ return 0; ++} ++ ++device_initcall(ioapic_init_sysfs); ++ ++/* -------------------------------------------------------------------------- ++ ACPI-based IOAPIC Configuration ++ -------------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_ACPI ++ ++int __init io_apic_get_unique_id (int ioapic, int apic_id) ++{ ++#ifndef CONFIG_XEN ++ union IO_APIC_reg_00 reg_00; ++ static physid_mask_t apic_id_map = PHYSID_MASK_NONE; ++ physid_mask_t tmp; ++ unsigned long flags; ++ int i = 0; ++ ++ /* ++ * The P4 platform supports up to 256 APIC IDs on two separate APIC ++ * buses (one for LAPICs, one for IOAPICs), where predecessors only ++ * supports up to 16 on one shared APIC bus. ++ * ++ * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full ++ * advantage of new APIC bus architecture. ++ */ ++ ++ if (physids_empty(apic_id_map)) ++ apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_00.raw = io_apic_read(ioapic, 0); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ if (apic_id >= get_physical_broadcast()) { ++ printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " ++ "%d\n", ioapic, apic_id, reg_00.bits.ID); ++ apic_id = reg_00.bits.ID; ++ } ++ ++ /* ++ * Every APIC in a system must have a unique ID or we get lots of nice ++ * 'stuck on smp_invalidate_needed IPI wait' messages. ++ */ ++ if (check_apicid_used(apic_id_map, apic_id)) { ++ ++ for (i = 0; i < get_physical_broadcast(); i++) { ++ if (!check_apicid_used(apic_id_map, i)) ++ break; ++ } ++ ++ if (i == get_physical_broadcast()) ++ panic("Max apic_id exceeded!\n"); ++ ++ printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " ++ "trying %d\n", ioapic, apic_id, i); ++ ++ apic_id = i; ++ } ++ ++ tmp = apicid_to_cpu_present(apic_id); ++ physids_or(apic_id_map, apic_id_map, tmp); ++ ++ if (reg_00.bits.ID != apic_id) { ++ reg_00.bits.ID = apic_id; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(ioapic, 0, reg_00.raw); ++ reg_00.raw = io_apic_read(ioapic, 0); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ /* Sanity check */ ++ if (reg_00.bits.ID != apic_id) { ++ printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); ++ return -1; ++ } ++ } ++ ++ apic_printk(APIC_VERBOSE, KERN_INFO ++ "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); ++#endif /* !CONFIG_XEN */ ++ ++ return apic_id; ++} ++ ++ ++int __init io_apic_get_version (int ioapic) ++{ ++ union IO_APIC_reg_01 reg_01; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_01.raw = io_apic_read(ioapic, 1); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return reg_01.bits.version; ++} ++ ++ ++int __init io_apic_get_redir_entries (int ioapic) ++{ ++ union IO_APIC_reg_01 reg_01; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ reg_01.raw = io_apic_read(ioapic, 1); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return reg_01.bits.entries; ++} ++ ++ ++int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) ++{ ++ struct IO_APIC_route_entry entry; ++ unsigned long flags; ++ ++ if (!IO_APIC_IRQ(irq)) { ++ printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", ++ ioapic); ++ return -EINVAL; ++ } ++ ++ /* ++ * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. ++ * Note that we mask (disable) IRQs now -- these get enabled when the ++ * corresponding device driver registers for this IRQ. ++ */ ++ ++ memset(&entry,0,sizeof(entry)); ++ ++ entry.delivery_mode = INT_DELIVERY_MODE; ++ entry.dest_mode = INT_DEST_MODE; ++ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); ++ entry.trigger = edge_level; ++ entry.polarity = active_high_low; ++ entry.mask = 1; ++ ++ /* ++ * IRQs < 16 are already in the irq_2_pin[] map ++ */ ++ if (irq >= 16) ++ add_pin_to_irq(irq, ioapic, pin); ++ ++ entry.vector = assign_irq_vector(irq); ++ ++ apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " ++ "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, ++ mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, ++ edge_level, active_high_low); ++ ++ ioapic_register_intr(irq, entry.vector, edge_level); ++ ++ if (!ioapic && (irq < 16)) ++ disable_8259A_irq(irq); ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); ++ io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); ++ set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ ++ return 0; ++} ++ ++#endif /* CONFIG_ACPI */ +Index: head-2008-11-25/arch/x86/kernel/ioport_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/ioport_32-xen.c 2008-01-28 12:24:19.000000000 +0100 +@@ -0,0 +1,123 @@ ++/* ++ * linux/arch/i386/kernel/ioport.c ++ * ++ * This contains the io-permission bitmap code - written by obz, with changes ++ * by Linus. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ ++static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) ++{ ++ unsigned long mask; ++ unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG); ++ unsigned int low_index = base & (BITS_PER_LONG-1); ++ int length = low_index + extent; ++ ++ if (low_index != 0) { ++ mask = (~0UL << low_index); ++ if (length < BITS_PER_LONG) ++ mask &= ~(~0UL << length); ++ if (new_value) ++ *bitmap_base++ |= mask; ++ else ++ *bitmap_base++ &= ~mask; ++ length -= BITS_PER_LONG; ++ } ++ ++ mask = (new_value ? ~0UL : 0UL); ++ while (length >= BITS_PER_LONG) { ++ *bitmap_base++ = mask; ++ length -= BITS_PER_LONG; ++ } ++ ++ if (length > 0) { ++ mask = ~(~0UL << length); ++ if (new_value) ++ *bitmap_base++ |= mask; ++ else ++ *bitmap_base++ &= ~mask; ++ } ++} ++ ++ ++/* ++ * this changes the io permissions bitmap in the current task. ++ */ ++asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) ++{ ++ struct thread_struct * t = ¤t->thread; ++ unsigned long *bitmap; ++ struct physdev_set_iobitmap set_iobitmap; ++ ++ if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) ++ return -EINVAL; ++ if (turn_on && !capable(CAP_SYS_RAWIO)) ++ return -EPERM; ++ ++ /* ++ * If it's the first ioperm() call in this thread's lifetime, set the ++ * IO bitmap up. ioperm() is much less timing critical than clone(), ++ * this is why we delay this operation until now: ++ */ ++ if (!t->io_bitmap_ptr) { ++ bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); ++ if (!bitmap) ++ return -ENOMEM; ++ ++ memset(bitmap, 0xff, IO_BITMAP_BYTES); ++ t->io_bitmap_ptr = bitmap; ++ set_thread_flag(TIF_IO_BITMAP); ++ ++ set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap); ++ set_iobitmap.nr_ports = IO_BITMAP_BITS; ++ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, ++ &set_iobitmap)); ++ } ++ ++ set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); ++ ++ return 0; ++} ++ ++/* ++ * sys_iopl has to be used when you want to access the IO ports ++ * beyond the 0x3ff range: to get the full 65536 ports bitmapped ++ * you'd need 8kB of bitmaps/process, which is a bit excessive. ++ * ++ * Here we just change the eflags value on the stack: we allow ++ * only the super-user to do it. This depends on the stack-layout ++ * on system-call entry - see also fork() and the signal handling ++ * code. ++ */ ++ ++asmlinkage long sys_iopl(unsigned long unused) ++{ ++ volatile struct pt_regs * regs = (struct pt_regs *) &unused; ++ unsigned int level = regs->ebx; ++ struct thread_struct *t = ¤t->thread; ++ unsigned int old = (t->iopl >> 12) & 3; ++ ++ if (level > 3) ++ return -EINVAL; ++ /* Trying to gain more privileges? */ ++ if (level > old) { ++ if (!capable(CAP_SYS_RAWIO)) ++ return -EPERM; ++ } ++ t->iopl = level << 12; ++ set_iopl_mask(t->iopl); ++ return 0; ++} +Index: head-2008-11-25/arch/x86/kernel/irq_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/irq_32-xen.c 2008-10-29 09:55:56.000000000 +0100 +@@ -0,0 +1,324 @@ ++/* ++ * linux/arch/i386/kernel/irq.c ++ * ++ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar ++ * ++ * This file contains the lowest level x86-specific interrupt ++ * entry, irq-stacks and irq statistics code. All the remaining ++ * irq logic is done by the generic kernel/irq/ code and ++ * by the x86-specific irq controller code. (e.g. i8259.c and ++ * io_apic.c.) ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; ++EXPORT_PER_CPU_SYMBOL(irq_stat); ++ ++#ifndef CONFIG_X86_LOCAL_APIC ++/* ++ * 'what should we do if we get a hw irq event on an illegal vector'. ++ * each architecture has to answer this themselves. ++ */ ++void ack_bad_irq(unsigned int irq) ++{ ++ printk("unexpected IRQ trap at vector %02x\n", irq); ++} ++#endif ++ ++#ifdef CONFIG_4KSTACKS ++/* ++ * per-CPU IRQ handling contexts (thread information and stack) ++ */ ++union irq_ctx { ++ struct thread_info tinfo; ++ u32 stack[THREAD_SIZE/sizeof(u32)]; ++}; ++ ++static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; ++static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; ++#endif ++ ++/* ++ * do_IRQ handles all normal device IRQ's (the special ++ * SMP cross-CPU interrupts have their own specific ++ * handlers). ++ */ ++fastcall unsigned int do_IRQ(struct pt_regs *regs) ++{ ++ /* high bit used in ret_from_ code */ ++ int irq = ~regs->orig_eax; ++#ifdef CONFIG_4KSTACKS ++ union irq_ctx *curctx, *irqctx; ++ u32 *isp; ++#endif ++ ++ if (unlikely((unsigned)irq >= NR_IRQS)) { ++ printk(KERN_EMERG "%s: cannot handle IRQ %d\n", ++ __FUNCTION__, irq); ++ BUG(); ++ } ++ ++ /*irq_enter();*/ ++#ifdef CONFIG_DEBUG_STACKOVERFLOW ++ /* Debugging check for stack overflow: is there less than 1KB free? */ ++ { ++ long esp; ++ ++ __asm__ __volatile__("andl %%esp,%0" : ++ "=r" (esp) : "0" (THREAD_SIZE - 1)); ++ if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { ++ printk("do_IRQ: stack overflow: %ld\n", ++ esp - sizeof(struct thread_info)); ++ dump_stack(); ++ } ++ } ++#endif ++ ++#ifdef CONFIG_4KSTACKS ++ ++ curctx = (union irq_ctx *) current_thread_info(); ++ irqctx = hardirq_ctx[smp_processor_id()]; ++ ++ /* ++ * this is where we switch to the IRQ stack. However, if we are ++ * already using the IRQ stack (because we interrupted a hardirq ++ * handler) we can't do that and just have to keep using the ++ * current stack (which is the irq stack already after all) ++ */ ++ if (curctx != irqctx) { ++ int arg1, arg2, ebx; ++ ++ /* build the stack frame on the IRQ stack */ ++ isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); ++ irqctx->tinfo.task = curctx->tinfo.task; ++ irqctx->tinfo.previous_esp = current_stack_pointer; ++ ++ /* ++ * Copy the softirq bits in preempt_count so that the ++ * softirq checks work in the hardirq context. ++ */ ++ irqctx->tinfo.preempt_count = ++ (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | ++ (curctx->tinfo.preempt_count & SOFTIRQ_MASK); ++ ++ asm volatile( ++ " xchgl %%ebx,%%esp \n" ++ " call __do_IRQ \n" ++ " movl %%ebx,%%esp \n" ++ : "=a" (arg1), "=d" (arg2), "=b" (ebx) ++ : "0" (irq), "1" (regs), "2" (isp) ++ : "memory", "cc", "ecx" ++ ); ++ } else ++#endif ++ __do_IRQ(irq, regs); ++ ++ /*irq_exit();*/ ++ ++ return 1; ++} ++ ++#ifdef CONFIG_4KSTACKS ++ ++/* ++ * These should really be __section__(".bss.page_aligned") as well, but ++ * gcc's 3.0 and earlier don't handle that correctly. ++ */ ++static char softirq_stack[NR_CPUS * THREAD_SIZE] ++ __attribute__((__aligned__(THREAD_SIZE))); ++ ++static char hardirq_stack[NR_CPUS * THREAD_SIZE] ++ __attribute__((__aligned__(THREAD_SIZE))); ++ ++/* ++ * allocate per-cpu stacks for hardirq and for softirq processing ++ */ ++void irq_ctx_init(int cpu) ++{ ++ union irq_ctx *irqctx; ++ ++ if (hardirq_ctx[cpu]) ++ return; ++ ++ irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE]; ++ irqctx->tinfo.task = NULL; ++ irqctx->tinfo.exec_domain = NULL; ++ irqctx->tinfo.cpu = cpu; ++ irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; ++ irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); ++ ++ hardirq_ctx[cpu] = irqctx; ++ ++ irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE]; ++ irqctx->tinfo.task = NULL; ++ irqctx->tinfo.exec_domain = NULL; ++ irqctx->tinfo.cpu = cpu; ++ irqctx->tinfo.preempt_count = 0; ++ irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); ++ ++ softirq_ctx[cpu] = irqctx; ++ ++ printk("CPU %u irqstacks, hard=%p soft=%p\n", ++ cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); ++} ++ ++void irq_ctx_exit(int cpu) ++{ ++ hardirq_ctx[cpu] = NULL; ++} ++ ++extern asmlinkage void __do_softirq(void); ++ ++asmlinkage void do_softirq(void) ++{ ++ unsigned long flags; ++ struct thread_info *curctx; ++ union irq_ctx *irqctx; ++ u32 *isp; ++ ++ if (in_interrupt()) ++ return; ++ ++ local_irq_save(flags); ++ ++ if (local_softirq_pending()) { ++ curctx = current_thread_info(); ++ irqctx = softirq_ctx[smp_processor_id()]; ++ irqctx->tinfo.task = curctx->task; ++ irqctx->tinfo.previous_esp = current_stack_pointer; ++ ++ /* build the stack frame on the softirq stack */ ++ isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); ++ ++ asm volatile( ++ " xchgl %%ebx,%%esp \n" ++ " call __do_softirq \n" ++ " movl %%ebx,%%esp \n" ++ : "=b"(isp) ++ : "0"(isp) ++ : "memory", "cc", "edx", "ecx", "eax" ++ ); ++ /* ++ * Shouldnt happen, we returned above if in_interrupt(): ++ */ ++ WARN_ON_ONCE(softirq_count()); ++ } ++ ++ local_irq_restore(flags); ++} ++ ++EXPORT_SYMBOL(do_softirq); ++#endif ++ ++/* ++ * Interrupt statistics: ++ */ ++ ++atomic_t irq_err_count; ++ ++/* ++ * /proc/interrupts printing: ++ */ ++ ++int show_interrupts(struct seq_file *p, void *v) ++{ ++ int i = *(loff_t *) v, j; ++ struct irqaction * action; ++ unsigned long flags; ++ ++ if (i == 0) { ++ seq_printf(p, " "); ++ for_each_online_cpu(j) ++ seq_printf(p, "CPU%-8d",j); ++ seq_putc(p, '\n'); ++ } ++ ++ if (i < NR_IRQS) { ++ spin_lock_irqsave(&irq_desc[i].lock, flags); ++ action = irq_desc[i].action; ++ if (!action) ++ goto skip; ++ seq_printf(p, "%3d: ",i); ++#ifndef CONFIG_SMP ++ seq_printf(p, "%10u ", kstat_irqs(i)); ++#else ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); ++#endif ++ seq_printf(p, " %14s", irq_desc[i].chip->typename); ++ seq_printf(p, " %s", action->name); ++ ++ for (action=action->next; action; action = action->next) ++ seq_printf(p, ", %s", action->name); ++ ++ seq_putc(p, '\n'); ++skip: ++ spin_unlock_irqrestore(&irq_desc[i].lock, flags); ++ } else if (i == NR_IRQS) { ++ seq_printf(p, "NMI: "); ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", nmi_count(j)); ++ seq_putc(p, '\n'); ++#ifdef CONFIG_X86_LOCAL_APIC ++ seq_printf(p, "LOC: "); ++ for_each_online_cpu(j) ++ seq_printf(p, "%10u ", ++ per_cpu(irq_stat,j).apic_timer_irqs); ++ seq_putc(p, '\n'); ++#endif ++ seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); ++#if defined(CONFIG_X86_IO_APIC) ++ seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); ++#endif ++ } ++ return 0; ++} ++ ++#ifdef CONFIG_HOTPLUG_CPU ++ ++void fixup_irqs(cpumask_t map) ++{ ++ unsigned int irq; ++ static int warned; ++ ++ for (irq = 0; irq < NR_IRQS; irq++) { ++ cpumask_t mask; ++ if (irq == 2) ++ continue; ++ ++ cpus_and(mask, irq_desc[irq].affinity, map); ++ if (any_online_cpu(mask) == NR_CPUS) { ++ /*printk("Breaking affinity for irq %i\n", irq);*/ ++ mask = map; ++ } ++ if (irq_desc[irq].chip->set_affinity) ++ irq_desc[irq].chip->set_affinity(irq, mask); ++ else if (irq_desc[irq].action && !(warned++)) ++ printk("Cannot set affinity for irq %i\n", irq); ++ } ++ ++#if 0 ++ barrier(); ++ /* Ingo Molnar says: "after the IO-APIC masks have been redirected ++ [note the nop - the interrupt-enable boundary on x86 is two ++ instructions from sti] - to flush out pending hardirqs and ++ IPIs. After this point nothing is supposed to reach this CPU." */ ++ __asm__ __volatile__("sti; nop; cli"); ++ barrier(); ++#else ++ /* That doesn't seem sufficient. Give it 1ms. */ ++ local_irq_enable(); ++ mdelay(1); ++ local_irq_disable(); ++#endif ++} ++#endif ++ +Index: head-2008-11-25/arch/x86/kernel/ldt_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/ldt_32-xen.c 2007-06-12 13:12:48.000000000 +0200 +@@ -0,0 +1,270 @@ ++/* ++ * linux/kernel/ldt.c ++ * ++ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds ++ * Copyright (C) 1999 Ingo Molnar ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ ++static void flush_ldt(void *null) ++{ ++ if (current->active_mm) ++ load_LDT(¤t->active_mm->context); ++} ++#endif ++ ++static int alloc_ldt(mm_context_t *pc, int mincount, int reload) ++{ ++ void *oldldt; ++ void *newldt; ++ int oldsize; ++ ++ if (mincount <= pc->size) ++ return 0; ++ oldsize = pc->size; ++ mincount = (mincount+511)&(~511); ++ if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) ++ newldt = vmalloc(mincount*LDT_ENTRY_SIZE); ++ else ++ newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); ++ ++ if (!newldt) ++ return -ENOMEM; ++ ++ if (oldsize) ++ memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); ++ oldldt = pc->ldt; ++ memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); ++ pc->ldt = newldt; ++ wmb(); ++ pc->size = mincount; ++ wmb(); ++ ++ if (reload) { ++#ifdef CONFIG_SMP ++ cpumask_t mask; ++ preempt_disable(); ++#endif ++ make_pages_readonly( ++ pc->ldt, ++ (pc->size * LDT_ENTRY_SIZE) / PAGE_SIZE, ++ XENFEAT_writable_descriptor_tables); ++ load_LDT(pc); ++#ifdef CONFIG_SMP ++ mask = cpumask_of_cpu(smp_processor_id()); ++ if (!cpus_equal(current->mm->cpu_vm_mask, mask)) ++ smp_call_function(flush_ldt, NULL, 1, 1); ++ preempt_enable(); ++#endif ++ } ++ if (oldsize) { ++ make_pages_writable( ++ oldldt, ++ (oldsize * LDT_ENTRY_SIZE) / PAGE_SIZE, ++ XENFEAT_writable_descriptor_tables); ++ if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) ++ vfree(oldldt); ++ else ++ kfree(oldldt); ++ } ++ return 0; ++} ++ ++static inline int copy_ldt(mm_context_t *new, mm_context_t *old) ++{ ++ int err = alloc_ldt(new, old->size, 0); ++ if (err < 0) ++ return err; ++ memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); ++ make_pages_readonly( ++ new->ldt, ++ (new->size * LDT_ENTRY_SIZE) / PAGE_SIZE, ++ XENFEAT_writable_descriptor_tables); ++ return 0; ++} ++ ++/* ++ * we do not have to muck with descriptors here, that is ++ * done in switch_mm() as needed. ++ */ ++int init_new_context(struct task_struct *tsk, struct mm_struct *mm) ++{ ++ struct mm_struct * old_mm; ++ int retval = 0; ++ ++ init_MUTEX(&mm->context.sem); ++ mm->context.size = 0; ++ mm->context.has_foreign_mappings = 0; ++ old_mm = current->mm; ++ if (old_mm && old_mm->context.size > 0) { ++ down(&old_mm->context.sem); ++ retval = copy_ldt(&mm->context, &old_mm->context); ++ up(&old_mm->context.sem); ++ } ++ return retval; ++} ++ ++/* ++ * No need to lock the MM as we are the last user ++ */ ++void destroy_context(struct mm_struct *mm) ++{ ++ if (mm->context.size) { ++ if (mm == current->active_mm) ++ clear_LDT(); ++ make_pages_writable( ++ mm->context.ldt, ++ (mm->context.size * LDT_ENTRY_SIZE) / PAGE_SIZE, ++ XENFEAT_writable_descriptor_tables); ++ if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) ++ vfree(mm->context.ldt); ++ else ++ kfree(mm->context.ldt); ++ mm->context.size = 0; ++ } ++} ++ ++static int read_ldt(void __user * ptr, unsigned long bytecount) ++{ ++ int err; ++ unsigned long size; ++ struct mm_struct * mm = current->mm; ++ ++ if (!mm->context.size) ++ return 0; ++ if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) ++ bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; ++ ++ down(&mm->context.sem); ++ size = mm->context.size*LDT_ENTRY_SIZE; ++ if (size > bytecount) ++ size = bytecount; ++ ++ err = 0; ++ if (copy_to_user(ptr, mm->context.ldt, size)) ++ err = -EFAULT; ++ up(&mm->context.sem); ++ if (err < 0) ++ goto error_return; ++ if (size != bytecount) { ++ /* zero-fill the rest */ ++ if (clear_user(ptr+size, bytecount-size) != 0) { ++ err = -EFAULT; ++ goto error_return; ++ } ++ } ++ return bytecount; ++error_return: ++ return err; ++} ++ ++static int read_default_ldt(void __user * ptr, unsigned long bytecount) ++{ ++ int err; ++ unsigned long size; ++ void *address; ++ ++ err = 0; ++ address = &default_ldt[0]; ++ size = 5*sizeof(struct desc_struct); ++ if (size > bytecount) ++ size = bytecount; ++ ++ err = size; ++ if (copy_to_user(ptr, address, size)) ++ err = -EFAULT; ++ ++ return err; ++} ++ ++static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) ++{ ++ struct mm_struct * mm = current->mm; ++ __u32 entry_1, entry_2; ++ int error; ++ struct user_desc ldt_info; ++ ++ error = -EINVAL; ++ if (bytecount != sizeof(ldt_info)) ++ goto out; ++ error = -EFAULT; ++ if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) ++ goto out; ++ ++ error = -EINVAL; ++ if (ldt_info.entry_number >= LDT_ENTRIES) ++ goto out; ++ if (ldt_info.contents == 3) { ++ if (oldmode) ++ goto out; ++ if (ldt_info.seg_not_present == 0) ++ goto out; ++ } ++ ++ down(&mm->context.sem); ++ if (ldt_info.entry_number >= mm->context.size) { ++ error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); ++ if (error < 0) ++ goto out_unlock; ++ } ++ ++ /* Allow LDTs to be cleared by the user. */ ++ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { ++ if (oldmode || LDT_empty(&ldt_info)) { ++ entry_1 = 0; ++ entry_2 = 0; ++ goto install; ++ } ++ } ++ ++ entry_1 = LDT_entry_a(&ldt_info); ++ entry_2 = LDT_entry_b(&ldt_info); ++ if (oldmode) ++ entry_2 &= ~(1 << 20); ++ ++ /* Install the new entry ... */ ++install: ++ error = write_ldt_entry(mm->context.ldt, ldt_info.entry_number, ++ entry_1, entry_2); ++ ++out_unlock: ++ up(&mm->context.sem); ++out: ++ return error; ++} ++ ++asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) ++{ ++ int ret = -ENOSYS; ++ ++ switch (func) { ++ case 0: ++ ret = read_ldt(ptr, bytecount); ++ break; ++ case 1: ++ ret = write_ldt(ptr, bytecount, 1); ++ break; ++ case 2: ++ ret = read_default_ldt(ptr, bytecount); ++ break; ++ case 0x11: ++ ret = write_ldt(ptr, bytecount, 0); ++ break; ++ } ++ return ret; ++} +Index: head-2008-11-25/arch/x86/kernel/microcode-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/microcode-xen.c 2007-06-12 13:12:48.000000000 +0200 +@@ -0,0 +1,144 @@ ++/* ++ * Intel CPU Microcode Update Driver for Linux ++ * ++ * Copyright (C) 2000-2004 Tigran Aivazian ++ * ++ * This driver allows to upgrade microcode on Intel processors ++ * belonging to IA-32 family - PentiumPro, Pentium II, ++ * Pentium III, Xeon, Pentium 4, etc. ++ * ++ * Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual, ++ * Order Number 245472 or free download from: ++ * ++ * http://developer.intel.com/design/pentium4/manuals/245472.htm ++ * ++ * For more information, go to http://www.urbanmyth.org/microcode ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ */ ++ ++//#define DEBUG /* pr_debug */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); ++MODULE_AUTHOR("Tigran Aivazian "); ++MODULE_LICENSE("GPL"); ++ ++static int verbose; ++module_param(verbose, int, 0644); ++ ++#define MICROCODE_VERSION "1.14a-xen" ++ ++#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ ++#define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */ ++#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ ++ ++/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ ++static DEFINE_MUTEX(microcode_mutex); ++ ++static int microcode_open (struct inode *unused1, struct file *unused2) ++{ ++ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; ++} ++ ++ ++static int do_microcode_update (const void __user *ubuf, size_t len) ++{ ++ int err; ++ void *kbuf; ++ ++ kbuf = vmalloc(len); ++ if (!kbuf) ++ return -ENOMEM; ++ ++ if (copy_from_user(kbuf, ubuf, len) == 0) { ++ struct xen_platform_op op; ++ ++ op.cmd = XENPF_microcode_update; ++ set_xen_guest_handle(op.u.microcode.data, kbuf); ++ op.u.microcode.length = len; ++ err = HYPERVISOR_platform_op(&op); ++ } else ++ err = -EFAULT; ++ ++ vfree(kbuf); ++ ++ return err; ++} ++ ++static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) ++{ ++ ssize_t ret; ++ ++ if (len < MC_HEADER_SIZE) { ++ printk(KERN_ERR "microcode: not enough data\n"); ++ return -EINVAL; ++ } ++ ++ mutex_lock(µcode_mutex); ++ ++ ret = do_microcode_update(buf, len); ++ if (!ret) ++ ret = (ssize_t)len; ++ ++ mutex_unlock(µcode_mutex); ++ ++ return ret; ++} ++ ++static struct file_operations microcode_fops = { ++ .owner = THIS_MODULE, ++ .write = microcode_write, ++ .open = microcode_open, ++}; ++ ++static struct miscdevice microcode_dev = { ++ .minor = MICROCODE_MINOR, ++ .name = "microcode", ++ .fops = µcode_fops, ++}; ++ ++static int __init microcode_init (void) ++{ ++ int error; ++ ++ error = misc_register(µcode_dev); ++ if (error) { ++ printk(KERN_ERR ++ "microcode: can't misc_register on minor=%d\n", ++ MICROCODE_MINOR); ++ return error; ++ } ++ ++ printk(KERN_INFO ++ "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); ++ return 0; ++} ++ ++static void __exit microcode_exit (void) ++{ ++ misc_deregister(µcode_dev); ++} ++ ++module_init(microcode_init) ++module_exit(microcode_exit) ++MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); +Index: head-2008-11-25/arch/x86/kernel/mpparse_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/mpparse_32-xen.c 2007-06-12 13:12:48.000000000 +0200 +@@ -0,0 +1,1185 @@ ++/* ++ * Intel Multiprocessor Specification 1.1 and 1.4 ++ * compliant MP-table parsing routines. ++ * ++ * (c) 1995 Alan Cox, Building #3 ++ * (c) 1998, 1999, 2000 Ingo Molnar ++ * ++ * Fixes ++ * Erich Boleyn : MP v1.4 and additional changes. ++ * Alan Cox : Added EBDA scanning ++ * Ingo Molnar : various cleanups and rewrites ++ * Maciej W. Rozycki: Bits for default MP configurations ++ * Paul Diefenbaugh: Added full ACPI support ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++/* Have we found an MP table */ ++int smp_found_config; ++unsigned int __initdata maxcpus = NR_CPUS; ++ ++/* ++ * Various Linux-internal data structures created from the ++ * MP-table. ++ */ ++int apic_version [MAX_APICS]; ++int mp_bus_id_to_type [MAX_MP_BUSSES]; ++int mp_bus_id_to_node [MAX_MP_BUSSES]; ++int mp_bus_id_to_local [MAX_MP_BUSSES]; ++int quad_local_to_mp_bus_id [NR_CPUS/4][4]; ++int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; ++static int mp_current_pci_id; ++ ++/* I/O APIC entries */ ++struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; ++ ++/* # of MP IRQ source entries */ ++struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; ++ ++/* MP IRQ source entries */ ++int mp_irq_entries; ++ ++int nr_ioapics; ++ ++int pic_mode; ++unsigned long mp_lapic_addr; ++ ++unsigned int def_to_bigsmp = 0; ++ ++/* Processor that is doing the boot up */ ++unsigned int boot_cpu_physical_apicid = -1U; ++/* Internal processor count */ ++static unsigned int __devinitdata num_processors; ++ ++/* Bitmask of physically existing CPUs */ ++physid_mask_t phys_cpu_present_map; ++ ++u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; ++ ++/* ++ * Intel MP BIOS table parsing routines: ++ */ ++ ++ ++/* ++ * Checksum an MP configuration block. ++ */ ++ ++static int __init mpf_checksum(unsigned char *mp, int len) ++{ ++ int sum = 0; ++ ++ while (len--) ++ sum += *mp++; ++ ++ return sum & 0xFF; ++} ++ ++/* ++ * Have to match translation table entries to main table entries by counter ++ * hence the mpc_record variable .... can't see a less disgusting way of ++ * doing this .... ++ */ ++ ++static int mpc_record; ++static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata; ++ ++#ifndef CONFIG_XEN ++static void __devinit MP_processor_info (struct mpc_config_processor *m) ++{ ++ int ver, apicid; ++ physid_mask_t phys_cpu; ++ ++ if (!(m->mpc_cpuflag & CPU_ENABLED)) ++ return; ++ ++ apicid = mpc_apic_id(m, translation_table[mpc_record]); ++ ++ if (m->mpc_featureflag&(1<<0)) ++ Dprintk(" Floating point unit present.\n"); ++ if (m->mpc_featureflag&(1<<7)) ++ Dprintk(" Machine Exception supported.\n"); ++ if (m->mpc_featureflag&(1<<8)) ++ Dprintk(" 64 bit compare & exchange supported.\n"); ++ if (m->mpc_featureflag&(1<<9)) ++ Dprintk(" Internal APIC present.\n"); ++ if (m->mpc_featureflag&(1<<11)) ++ Dprintk(" SEP present.\n"); ++ if (m->mpc_featureflag&(1<<12)) ++ Dprintk(" MTRR present.\n"); ++ if (m->mpc_featureflag&(1<<13)) ++ Dprintk(" PGE present.\n"); ++ if (m->mpc_featureflag&(1<<14)) ++ Dprintk(" MCA present.\n"); ++ if (m->mpc_featureflag&(1<<15)) ++ Dprintk(" CMOV present.\n"); ++ if (m->mpc_featureflag&(1<<16)) ++ Dprintk(" PAT present.\n"); ++ if (m->mpc_featureflag&(1<<17)) ++ Dprintk(" PSE present.\n"); ++ if (m->mpc_featureflag&(1<<18)) ++ Dprintk(" PSN present.\n"); ++ if (m->mpc_featureflag&(1<<19)) ++ Dprintk(" Cache Line Flush Instruction present.\n"); ++ /* 20 Reserved */ ++ if (m->mpc_featureflag&(1<<21)) ++ Dprintk(" Debug Trace and EMON Store present.\n"); ++ if (m->mpc_featureflag&(1<<22)) ++ Dprintk(" ACPI Thermal Throttle Registers present.\n"); ++ if (m->mpc_featureflag&(1<<23)) ++ Dprintk(" MMX present.\n"); ++ if (m->mpc_featureflag&(1<<24)) ++ Dprintk(" FXSR present.\n"); ++ if (m->mpc_featureflag&(1<<25)) ++ Dprintk(" XMM present.\n"); ++ if (m->mpc_featureflag&(1<<26)) ++ Dprintk(" Willamette New Instructions present.\n"); ++ if (m->mpc_featureflag&(1<<27)) ++ Dprintk(" Self Snoop present.\n"); ++ if (m->mpc_featureflag&(1<<28)) ++ Dprintk(" HT present.\n"); ++ if (m->mpc_featureflag&(1<<29)) ++ Dprintk(" Thermal Monitor present.\n"); ++ /* 30, 31 Reserved */ ++ ++ ++ if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { ++ Dprintk(" Bootup CPU\n"); ++ boot_cpu_physical_apicid = m->mpc_apicid; ++ } ++ ++ ver = m->mpc_apicver; ++ ++ /* ++ * Validate version ++ */ ++ if (ver == 0x0) { ++ printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " ++ "fixing up to 0x10. (tell your hw vendor)\n", ++ m->mpc_apicid); ++ ver = 0x10; ++ } ++ apic_version[m->mpc_apicid] = ver; ++ ++ phys_cpu = apicid_to_cpu_present(apicid); ++ physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); ++ ++ if (num_processors >= NR_CPUS) { ++ printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." ++ " Processor ignored.\n", NR_CPUS); ++ return; ++ } ++ ++ if (num_processors >= maxcpus) { ++ printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." ++ " Processor ignored.\n", maxcpus); ++ return; ++ } ++ ++ cpu_set(num_processors, cpu_possible_map); ++ num_processors++; ++ ++ /* ++ * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y ++ * but we need to work other dependencies like SMP_SUSPEND etc ++ * before this can be done without some confusion. ++ * if (CPU_HOTPLUG_ENABLED || num_processors > 8) ++ * - Ashok Raj ++ */ ++ if (num_processors > 8) { ++ switch (boot_cpu_data.x86_vendor) { ++ case X86_VENDOR_INTEL: ++ if (!APIC_XAPIC(ver)) { ++ def_to_bigsmp = 0; ++ break; ++ } ++ /* If P4 and above fall through */ ++ case X86_VENDOR_AMD: ++ def_to_bigsmp = 1; ++ } ++ } ++ bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; ++} ++#else ++void __init MP_processor_info (struct mpc_config_processor *m) ++{ ++ num_processors++; ++} ++#endif /* CONFIG_XEN */ ++ ++static void __init MP_bus_info (struct mpc_config_bus *m) ++{ ++ char str[7]; ++ ++ memcpy(str, m->mpc_bustype, 6); ++ str[6] = 0; ++ ++ mpc_oem_bus_info(m, str, translation_table[mpc_record]); ++ ++ if (m->mpc_busid >= MAX_MP_BUSSES) { ++ printk(KERN_WARNING "MP table busid value (%d) for bustype %s " ++ " is too large, max. supported is %d\n", ++ m->mpc_busid, str, MAX_MP_BUSSES - 1); ++ return; ++ } ++ ++ if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { ++ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; ++ } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) { ++ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; ++ } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) { ++ mpc_oem_pci_bus(m, translation_table[mpc_record]); ++ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; ++ mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; ++ mp_current_pci_id++; ++ } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { ++ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; ++ } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) { ++ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98; ++ } else { ++ printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); ++ } ++} ++ ++static void __init MP_ioapic_info (struct mpc_config_ioapic *m) ++{ ++ if (!(m->mpc_flags & MPC_APIC_USABLE)) ++ return; ++ ++ printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n", ++ m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); ++ if (nr_ioapics >= MAX_IO_APICS) { ++ printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n", ++ MAX_IO_APICS, nr_ioapics); ++ panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); ++ } ++ if (!m->mpc_apicaddr) { ++ printk(KERN_ERR "WARNING: bogus zero I/O APIC address" ++ " found in MP table, skipping!\n"); ++ return; ++ } ++ mp_ioapics[nr_ioapics] = *m; ++ nr_ioapics++; ++} ++ ++static void __init MP_intsrc_info (struct mpc_config_intsrc *m) ++{ ++ mp_irqs [mp_irq_entries] = *m; ++ Dprintk("Int: type %d, pol %d, trig %d, bus %d," ++ " IRQ %02x, APIC ID %x, APIC INT %02x\n", ++ m->mpc_irqtype, m->mpc_irqflag & 3, ++ (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, ++ m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); ++ if (++mp_irq_entries == MAX_IRQ_SOURCES) ++ panic("Max # of irq sources exceeded!!\n"); ++} ++ ++static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) ++{ ++ Dprintk("Lint: type %d, pol %d, trig %d, bus %d," ++ " IRQ %02x, APIC ID %x, APIC LINT %02x\n", ++ m->mpc_irqtype, m->mpc_irqflag & 3, ++ (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, ++ m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); ++ /* ++ * Well it seems all SMP boards in existence ++ * use ExtINT/LVT1 == LINT0 and ++ * NMI/LVT2 == LINT1 - the following check ++ * will show us if this assumptions is false. ++ * Until then we do not have to add baggage. ++ */ ++ if ((m->mpc_irqtype == mp_ExtINT) && ++ (m->mpc_destapiclint != 0)) ++ BUG(); ++ if ((m->mpc_irqtype == mp_NMI) && ++ (m->mpc_destapiclint != 1)) ++ BUG(); ++} ++ ++#ifdef CONFIG_X86_NUMAQ ++static void __init MP_translation_info (struct mpc_config_translation *m) ++{ ++ printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local); ++ ++ if (mpc_record >= MAX_MPC_ENTRY) ++ printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); ++ else ++ translation_table[mpc_record] = m; /* stash this for later */ ++ if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) ++ node_set_online(m->trans_quad); ++} ++ ++/* ++ * Read/parse the MPC oem tables ++ */ ++ ++static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \ ++ unsigned short oemsize) ++{ ++ int count = sizeof (*oemtable); /* the header size */ ++ unsigned char *oemptr = ((unsigned char *)oemtable)+count; ++ ++ mpc_record = 0; ++ printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable); ++ if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4)) ++ { ++ printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", ++ oemtable->oem_signature[0], ++ oemtable->oem_signature[1], ++ oemtable->oem_signature[2], ++ oemtable->oem_signature[3]); ++ return; ++ } ++ if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length)) ++ { ++ printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); ++ return; ++ } ++ while (count < oemtable->oem_length) { ++ switch (*oemptr) { ++ case MP_TRANSLATION: ++ { ++ struct mpc_config_translation *m= ++ (struct mpc_config_translation *)oemptr; ++ MP_translation_info(m); ++ oemptr += sizeof(*m); ++ count += sizeof(*m); ++ ++mpc_record; ++ break; ++ } ++ default: ++ { ++ printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr); ++ return; ++ } ++ } ++ } ++} ++ ++static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, ++ char *productid) ++{ ++ if (strncmp(oem, "IBM NUMA", 8)) ++ printk("Warning! May not be a NUMA-Q system!\n"); ++ if (mpc->mpc_oemptr) ++ smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr, ++ mpc->mpc_oemsize); ++} ++#endif /* CONFIG_X86_NUMAQ */ ++ ++/* ++ * Read/parse the MPC ++ */ ++ ++static int __init smp_read_mpc(struct mp_config_table *mpc) ++{ ++ char str[16]; ++ char oem[10]; ++ int count=sizeof(*mpc); ++ unsigned char *mpt=((unsigned char *)mpc)+count; ++ ++ if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { ++ printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n", ++ *(u32 *)mpc->mpc_signature); ++ return 0; ++ } ++ if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { ++ printk(KERN_ERR "SMP mptable: checksum error!\n"); ++ return 0; ++ } ++ if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { ++ printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n", ++ mpc->mpc_spec); ++ return 0; ++ } ++ if (!mpc->mpc_lapic) { ++ printk(KERN_ERR "SMP mptable: null local APIC address!\n"); ++ return 0; ++ } ++ memcpy(oem,mpc->mpc_oem,8); ++ oem[8]=0; ++ printk(KERN_INFO "OEM ID: %s ",oem); ++ ++ memcpy(str,mpc->mpc_productid,12); ++ str[12]=0; ++ printk("Product ID: %s ",str); ++ ++ mps_oem_check(mpc, oem, str); ++ ++ printk("APIC at: 0x%lX\n",mpc->mpc_lapic); ++ ++ /* ++ * Save the local APIC address (it might be non-default) -- but only ++ * if we're not using ACPI. ++ */ ++ if (!acpi_lapic) ++ mp_lapic_addr = mpc->mpc_lapic; ++ ++ /* ++ * Now process the configuration blocks. ++ */ ++ mpc_record = 0; ++ while (count < mpc->mpc_length) { ++ switch(*mpt) { ++ case MP_PROCESSOR: ++ { ++ struct mpc_config_processor *m= ++ (struct mpc_config_processor *)mpt; ++ /* ACPI may have already provided this data */ ++ if (!acpi_lapic) ++ MP_processor_info(m); ++ mpt += sizeof(*m); ++ count += sizeof(*m); ++ break; ++ } ++ case MP_BUS: ++ { ++ struct mpc_config_bus *m= ++ (struct mpc_config_bus *)mpt; ++ MP_bus_info(m); ++ mpt += sizeof(*m); ++ count += sizeof(*m); ++ break; ++ } ++ case MP_IOAPIC: ++ { ++ struct mpc_config_ioapic *m= ++ (struct mpc_config_ioapic *)mpt; ++ MP_ioapic_info(m); ++ mpt+=sizeof(*m); ++ count+=sizeof(*m); ++ break; ++ } ++ case MP_INTSRC: ++ { ++ struct mpc_config_intsrc *m= ++ (struct mpc_config_intsrc *)mpt; ++ ++ MP_intsrc_info(m); ++ mpt+=sizeof(*m); ++ count+=sizeof(*m); ++ break; ++ } ++ case MP_LINTSRC: ++ { ++ struct mpc_config_lintsrc *m= ++ (struct mpc_config_lintsrc *)mpt; ++ MP_lintsrc_info(m); ++ mpt+=sizeof(*m); ++ count+=sizeof(*m); ++ break; ++ } ++ default: ++ { ++ count = mpc->mpc_length; ++ break; ++ } ++ } ++ ++mpc_record; ++ } ++ clustered_apic_check(); ++ if (!num_processors) ++ printk(KERN_ERR "SMP mptable: no processors registered!\n"); ++ return num_processors; ++} ++ ++static int __init ELCR_trigger(unsigned int irq) ++{ ++ unsigned int port; ++ ++ port = 0x4d0 + (irq >> 3); ++ return (inb(port) >> (irq & 7)) & 1; ++} ++ ++static void __init construct_default_ioirq_mptable(int mpc_default_type) ++{ ++ struct mpc_config_intsrc intsrc; ++ int i; ++ int ELCR_fallback = 0; ++ ++ intsrc.mpc_type = MP_INTSRC; ++ intsrc.mpc_irqflag = 0; /* conforming */ ++ intsrc.mpc_srcbus = 0; ++ intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; ++ ++ intsrc.mpc_irqtype = mp_INT; ++ ++ /* ++ * If true, we have an ISA/PCI system with no IRQ entries ++ * in the MP table. To prevent the PCI interrupts from being set up ++ * incorrectly, we try to use the ELCR. The sanity check to see if ++ * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can ++ * never be level sensitive, so we simply see if the ELCR agrees. ++ * If it does, we assume it's valid. ++ */ ++ if (mpc_default_type == 5) { ++ printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); ++ ++ if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13)) ++ printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n"); ++ else { ++ printk(KERN_INFO "Using ELCR to identify PCI interrupts\n"); ++ ELCR_fallback = 1; ++ } ++ } ++ ++ for (i = 0; i < 16; i++) { ++ switch (mpc_default_type) { ++ case 2: ++ if (i == 0 || i == 13) ++ continue; /* IRQ0 & IRQ13 not connected */ ++ /* fall through */ ++ default: ++ if (i == 2) ++ continue; /* IRQ2 is never connected */ ++ } ++ ++ if (ELCR_fallback) { ++ /* ++ * If the ELCR indicates a level-sensitive interrupt, we ++ * copy that information over to the MP table in the ++ * irqflag field (level sensitive, active high polarity). ++ */ ++ if (ELCR_trigger(i)) ++ intsrc.mpc_irqflag = 13; ++ else ++ intsrc.mpc_irqflag = 0; ++ } ++ ++ intsrc.mpc_srcbusirq = i; ++ intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ ++ MP_intsrc_info(&intsrc); ++ } ++ ++ intsrc.mpc_irqtype = mp_ExtINT; ++ intsrc.mpc_srcbusirq = 0; ++ intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ ++ MP_intsrc_info(&intsrc); ++} ++ ++static inline void __init construct_default_ISA_mptable(int mpc_default_type) ++{ ++ struct mpc_config_processor processor; ++ struct mpc_config_bus bus; ++ struct mpc_config_ioapic ioapic; ++ struct mpc_config_lintsrc lintsrc; ++ int linttypes[2] = { mp_ExtINT, mp_NMI }; ++ int i; ++ ++ /* ++ * local APIC has default address ++ */ ++ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; ++ ++ /* ++ * 2 CPUs, numbered 0 & 1. ++ */ ++ processor.mpc_type = MP_PROCESSOR; ++ /* Either an integrated APIC or a discrete 82489DX. */ ++ processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; ++ processor.mpc_cpuflag = CPU_ENABLED; ++ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | ++ (boot_cpu_data.x86_model << 4) | ++ boot_cpu_data.x86_mask; ++ processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; ++ processor.mpc_reserved[0] = 0; ++ processor.mpc_reserved[1] = 0; ++ for (i = 0; i < 2; i++) { ++ processor.mpc_apicid = i; ++ MP_processor_info(&processor); ++ } ++ ++ bus.mpc_type = MP_BUS; ++ bus.mpc_busid = 0; ++ switch (mpc_default_type) { ++ default: ++ printk("???\n"); ++ printk(KERN_ERR "Unknown standard configuration %d\n", ++ mpc_default_type); ++ /* fall through */ ++ case 1: ++ case 5: ++ memcpy(bus.mpc_bustype, "ISA ", 6); ++ break; ++ case 2: ++ case 6: ++ case 3: ++ memcpy(bus.mpc_bustype, "EISA ", 6); ++ break; ++ case 4: ++ case 7: ++ memcpy(bus.mpc_bustype, "MCA ", 6); ++ } ++ MP_bus_info(&bus); ++ if (mpc_default_type > 4) { ++ bus.mpc_busid = 1; ++ memcpy(bus.mpc_bustype, "PCI ", 6); ++ MP_bus_info(&bus); ++ } ++ ++ ioapic.mpc_type = MP_IOAPIC; ++ ioapic.mpc_apicid = 2; ++ ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; ++ ioapic.mpc_flags = MPC_APIC_USABLE; ++ ioapic.mpc_apicaddr = 0xFEC00000; ++ MP_ioapic_info(&ioapic); ++ ++ /* ++ * We set up most of the low 16 IO-APIC pins according to MPS rules. ++ */ ++ construct_default_ioirq_mptable(mpc_default_type); ++ ++ lintsrc.mpc_type = MP_LINTSRC; ++ lintsrc.mpc_irqflag = 0; /* conforming */ ++ lintsrc.mpc_srcbusid = 0; ++ lintsrc.mpc_srcbusirq = 0; ++ lintsrc.mpc_destapic = MP_APIC_ALL; ++ for (i = 0; i < 2; i++) { ++ lintsrc.mpc_irqtype = linttypes[i]; ++ lintsrc.mpc_destapiclint = i; ++ MP_lintsrc_info(&lintsrc); ++ } ++} ++ ++static struct intel_mp_floating *mpf_found; ++ ++/* ++ * Scan the memory blocks for an SMP configuration block. ++ */ ++void __init get_smp_config (void) ++{ ++ struct intel_mp_floating *mpf = mpf_found; ++ ++ /* ++ * ACPI supports both logical (e.g. Hyper-Threading) and physical ++ * processors, where MPS only supports physical. ++ */ ++ if (acpi_lapic && acpi_ioapic) { ++ printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n"); ++ return; ++ } ++ else if (acpi_lapic) ++ printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n"); ++ ++ printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); ++ if (mpf->mpf_feature2 & (1<<7)) { ++ printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); ++ pic_mode = 1; ++ } else { ++ printk(KERN_INFO " Virtual Wire compatibility mode.\n"); ++ pic_mode = 0; ++ } ++ ++ /* ++ * Now see if we need to read further. ++ */ ++ if (mpf->mpf_feature1 != 0) { ++ ++ printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1); ++ construct_default_ISA_mptable(mpf->mpf_feature1); ++ ++ } else if (mpf->mpf_physptr) { ++ ++ /* ++ * Read the physical hardware table. Anything here will ++ * override the defaults. ++ */ ++ if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) { ++ smp_found_config = 0; ++ printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); ++ printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); ++ return; ++ } ++ /* ++ * If there are no explicit MP IRQ entries, then we are ++ * broken. We set up most of the low 16 IO-APIC pins to ++ * ISA defaults and hope it will work. ++ */ ++ if (!mp_irq_entries) { ++ struct mpc_config_bus bus; ++ ++ printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); ++ ++ bus.mpc_type = MP_BUS; ++ bus.mpc_busid = 0; ++ memcpy(bus.mpc_bustype, "ISA ", 6); ++ MP_bus_info(&bus); ++ ++ construct_default_ioirq_mptable(0); ++ } ++ ++ } else ++ BUG(); ++ ++ printk(KERN_INFO "Processors: %d\n", num_processors); ++ /* ++ * Only use the first configuration found. ++ */ ++} ++ ++static int __init smp_scan_config (unsigned long base, unsigned long length) ++{ ++ unsigned long *bp = isa_bus_to_virt(base); ++ struct intel_mp_floating *mpf; ++ ++ Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); ++ if (sizeof(*mpf) != 16) ++ printk("Error: MPF size\n"); ++ ++ while (length > 0) { ++ mpf = (struct intel_mp_floating *)bp; ++ if ((*bp == SMP_MAGIC_IDENT) && ++ (mpf->mpf_length == 1) && ++ !mpf_checksum((unsigned char *)bp, 16) && ++ ((mpf->mpf_specification == 1) ++ || (mpf->mpf_specification == 4)) ) { ++ ++ smp_found_config = 1; ++#ifndef CONFIG_XEN ++ printk(KERN_INFO "found SMP MP-table at %08lx\n", ++ virt_to_phys(mpf)); ++ reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); ++ if (mpf->mpf_physptr) { ++ /* ++ * We cannot access to MPC table to compute ++ * table size yet, as only few megabytes from ++ * the bottom is mapped now. ++ * PC-9800's MPC table places on the very last ++ * of physical memory; so that simply reserving ++ * PAGE_SIZE from mpg->mpf_physptr yields BUG() ++ * in reserve_bootmem. ++ */ ++ unsigned long size = PAGE_SIZE; ++ unsigned long end = max_low_pfn * PAGE_SIZE; ++ if (mpf->mpf_physptr + size > end) ++ size = end - mpf->mpf_physptr; ++ reserve_bootmem(mpf->mpf_physptr, size); ++ } ++#else ++ printk(KERN_INFO "found SMP MP-table at %08lx\n", ++ ((unsigned long)bp - (unsigned long)isa_bus_to_virt(base)) + base); ++#endif ++ ++ mpf_found = mpf; ++ return 1; ++ } ++ bp += 4; ++ length -= 16; ++ } ++ return 0; ++} ++ ++void __init find_smp_config (void) ++{ ++#ifndef CONFIG_XEN ++ unsigned int address; ++#endif ++ ++ /* ++ * FIXME: Linux assumes you have 640K of base ram.. ++ * this continues the error... ++ * ++ * 1) Scan the bottom 1K for a signature ++ * 2) Scan the top 1K of base RAM ++ * 3) Scan the 64K of bios ++ */ ++ if (smp_scan_config(0x0,0x400) || ++ smp_scan_config(639*0x400,0x400) || ++ smp_scan_config(0xF0000,0x10000)) ++ return; ++ /* ++ * If it is an SMP machine we should know now, unless the ++ * configuration is in an EISA/MCA bus machine with an ++ * extended bios data area. ++ * ++ * there is a real-mode segmented pointer pointing to the ++ * 4K EBDA area at 0x40E, calculate and scan it here. ++ * ++ * NOTE! There are Linux loaders that will corrupt the EBDA ++ * area, and as such this kind of SMP config may be less ++ * trustworthy, simply because the SMP table may have been ++ * stomped on during early boot. These loaders are buggy and ++ * should be fixed. ++ * ++ * MP1.4 SPEC states to only scan first 1K of 4K EBDA. ++ */ ++ ++#ifndef CONFIG_XEN ++ address = get_bios_ebda(); ++ if (address) ++ smp_scan_config(address, 0x400); ++#endif ++} ++ ++int es7000_plat; ++ ++/* -------------------------------------------------------------------------- ++ ACPI-based MP Configuration ++ -------------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_ACPI ++ ++void __init mp_register_lapic_address ( ++ u64 address) ++{ ++#ifndef CONFIG_XEN ++ mp_lapic_addr = (unsigned long) address; ++ ++ set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); ++ ++ if (boot_cpu_physical_apicid == -1U) ++ boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); ++ ++ Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); ++#endif ++} ++ ++ ++void __devinit mp_register_lapic ( ++ u8 id, ++ u8 enabled) ++{ ++ struct mpc_config_processor processor; ++ int boot_cpu = 0; ++ ++ if (MAX_APICS - id <= 0) { ++ printk(KERN_WARNING "Processor #%d invalid (max %d)\n", ++ id, MAX_APICS); ++ return; ++ } ++ ++ if (id == boot_cpu_physical_apicid) ++ boot_cpu = 1; ++ ++#ifndef CONFIG_XEN ++ processor.mpc_type = MP_PROCESSOR; ++ processor.mpc_apicid = id; ++ processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR)); ++ processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); ++ processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); ++ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | ++ (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; ++ processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; ++ processor.mpc_reserved[0] = 0; ++ processor.mpc_reserved[1] = 0; ++#endif ++ ++ MP_processor_info(&processor); ++} ++ ++#ifdef CONFIG_X86_IO_APIC ++ ++#define MP_ISA_BUS 0 ++#define MP_MAX_IOAPIC_PIN 127 ++ ++static struct mp_ioapic_routing { ++ int apic_id; ++ int gsi_base; ++ int gsi_end; ++ u32 pin_programmed[4]; ++} mp_ioapic_routing[MAX_IO_APICS]; ++ ++ ++static int mp_find_ioapic ( ++ int gsi) ++{ ++ int i = 0; ++ ++ /* Find the IOAPIC that manages this GSI. */ ++ for (i = 0; i < nr_ioapics; i++) { ++ if ((gsi >= mp_ioapic_routing[i].gsi_base) ++ && (gsi <= mp_ioapic_routing[i].gsi_end)) ++ return i; ++ } ++ ++ printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); ++ ++ return -1; ++} ++ ++ ++void __init mp_register_ioapic ( ++ u8 id, ++ u32 address, ++ u32 gsi_base) ++{ ++ int idx = 0; ++ int tmpid; ++ ++ if (nr_ioapics >= MAX_IO_APICS) { ++ printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " ++ "(found %d)\n", MAX_IO_APICS, nr_ioapics); ++ panic("Recompile kernel with bigger MAX_IO_APICS!\n"); ++ } ++ if (!address) { ++ printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" ++ " found in MADT table, skipping!\n"); ++ return; ++ } ++ ++ idx = nr_ioapics++; ++ ++ mp_ioapics[idx].mpc_type = MP_IOAPIC; ++ mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; ++ mp_ioapics[idx].mpc_apicaddr = address; ++ ++#ifndef CONFIG_XEN ++ set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); ++#endif ++ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ++ && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) ++ tmpid = io_apic_get_unique_id(idx, id); ++ else ++ tmpid = id; ++ if (tmpid == -1) { ++ nr_ioapics--; ++ return; ++ } ++ mp_ioapics[idx].mpc_apicid = tmpid; ++ mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); ++ ++ /* ++ * Build basic GSI lookup table to facilitate gsi->io_apic lookups ++ * and to prevent reprogramming of IOAPIC pins (PCI GSIs). ++ */ ++ mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; ++ mp_ioapic_routing[idx].gsi_base = gsi_base; ++ mp_ioapic_routing[idx].gsi_end = gsi_base + ++ io_apic_get_redir_entries(idx); ++ ++ printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " ++ "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, ++ mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, ++ mp_ioapic_routing[idx].gsi_base, ++ mp_ioapic_routing[idx].gsi_end); ++ ++ return; ++} ++ ++ ++void __init mp_override_legacy_irq ( ++ u8 bus_irq, ++ u8 polarity, ++ u8 trigger, ++ u32 gsi) ++{ ++ struct mpc_config_intsrc intsrc; ++ int ioapic = -1; ++ int pin = -1; ++ ++ /* ++ * Convert 'gsi' to 'ioapic.pin'. ++ */ ++ ioapic = mp_find_ioapic(gsi); ++ if (ioapic < 0) ++ return; ++ pin = gsi - mp_ioapic_routing[ioapic].gsi_base; ++ ++ /* ++ * TBD: This check is for faulty timer entries, where the override ++ * erroneously sets the trigger to level, resulting in a HUGE ++ * increase of timer interrupts! ++ */ ++ if ((bus_irq == 0) && (trigger == 3)) ++ trigger = 1; ++ ++ intsrc.mpc_type = MP_INTSRC; ++ intsrc.mpc_irqtype = mp_INT; ++ intsrc.mpc_irqflag = (trigger << 2) | polarity; ++ intsrc.mpc_srcbus = MP_ISA_BUS; ++ intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ ++ intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ ++ intsrc.mpc_dstirq = pin; /* INTIN# */ ++ ++ Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", ++ intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, ++ (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, ++ intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); ++ ++ mp_irqs[mp_irq_entries] = intsrc; ++ if (++mp_irq_entries == MAX_IRQ_SOURCES) ++ panic("Max # of irq sources exceeded!\n"); ++ ++ return; ++} ++ ++void __init mp_config_acpi_legacy_irqs (void) ++{ ++ struct mpc_config_intsrc intsrc; ++ int i = 0; ++ int ioapic = -1; ++ ++ /* ++ * Fabricate the legacy ISA bus (bus #31). ++ */ ++ mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; ++ Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); ++ ++ /* ++ * Older generations of ES7000 have no legacy identity mappings ++ */ ++ if (es7000_plat == 1) ++ return; ++ ++ /* ++ * Locate the IOAPIC that manages the ISA IRQs (0-15). ++ */ ++ ioapic = mp_find_ioapic(0); ++ if (ioapic < 0) ++ return; ++ ++ intsrc.mpc_type = MP_INTSRC; ++ intsrc.mpc_irqflag = 0; /* Conforming */ ++ intsrc.mpc_srcbus = MP_ISA_BUS; ++ intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; ++ ++ /* ++ * Use the default configuration for the IRQs 0-15. Unless ++ * overriden by (MADT) interrupt source override entries. ++ */ ++ for (i = 0; i < 16; i++) { ++ int idx; ++ ++ for (idx = 0; idx < mp_irq_entries; idx++) { ++ struct mpc_config_intsrc *irq = mp_irqs + idx; ++ ++ /* Do we already have a mapping for this ISA IRQ? */ ++ if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i) ++ break; ++ ++ /* Do we already have a mapping for this IOAPIC pin */ ++ if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && ++ (irq->mpc_dstirq == i)) ++ break; ++ } ++ ++ if (idx != mp_irq_entries) { ++ printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); ++ continue; /* IRQ already used */ ++ } ++ ++ intsrc.mpc_irqtype = mp_INT; ++ intsrc.mpc_srcbusirq = i; /* Identity mapped */ ++ intsrc.mpc_dstirq = i; ++ ++ Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " ++ "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, ++ (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, ++ intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, ++ intsrc.mpc_dstirq); ++ ++ mp_irqs[mp_irq_entries] = intsrc; ++ if (++mp_irq_entries == MAX_IRQ_SOURCES) ++ panic("Max # of irq sources exceeded!\n"); ++ } ++} ++ ++#define MAX_GSI_NUM 4096 ++ ++int mp_register_gsi (u32 gsi, int triggering, int polarity) ++{ ++ int ioapic = -1; ++ int ioapic_pin = 0; ++ int idx, bit = 0; ++ static int pci_irq = 16; ++ /* ++ * Mapping between Global System Interrups, which ++ * represent all possible interrupts, and IRQs ++ * assigned to actual devices. ++ */ ++ static int gsi_to_irq[MAX_GSI_NUM]; ++ ++ /* Don't set up the ACPI SCI because it's already set up */ ++ if (acpi_fadt.sci_int == gsi) ++ return gsi; ++ ++ ioapic = mp_find_ioapic(gsi); ++ if (ioapic < 0) { ++ printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); ++ return gsi; ++ } ++ ++ ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; ++ ++ if (ioapic_renumber_irq) ++ gsi = ioapic_renumber_irq(ioapic, gsi); ++ ++ /* ++ * Avoid pin reprogramming. PRTs typically include entries ++ * with redundant pin->gsi mappings (but unique PCI devices); ++ * we only program the IOAPIC on the first. ++ */ ++ bit = ioapic_pin % 32; ++ idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); ++ if (idx > 3) { ++ printk(KERN_ERR "Invalid reference to IOAPIC pin " ++ "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, ++ ioapic_pin); ++ return gsi; ++ } ++ if ((1< 15), but ++ * avoid a problem where the 8254 timer (IRQ0) is setup ++ * via an override (so it's not on pin 0 of the ioapic), ++ * and at the same time, the pin 0 interrupt is a PCI ++ * type. The gsi > 15 test could cause these two pins ++ * to be shared as IRQ0, and they are not shareable. ++ * So test for this condition, and if necessary, avoid ++ * the pin collision. ++ */ ++ if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0)) ++ gsi = pci_irq++; ++ /* ++ * Don't assign IRQ used by ACPI SCI ++ */ ++ if (gsi == acpi_fadt.sci_int) ++ gsi = pci_irq++; ++ gsi_to_irq[irq] = gsi; ++ } else { ++ printk(KERN_ERR "GSI %u is too high\n", gsi); ++ return gsi; ++ } ++ } ++ ++ io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, ++ triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, ++ polarity == ACPI_ACTIVE_HIGH ? 0 : 1); ++ return gsi; ++} ++ ++#endif /* CONFIG_X86_IO_APIC */ ++#endif /* CONFIG_ACPI */ +Index: head-2008-11-25/arch/x86/kernel/pci-dma-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/pci-dma-xen.c 2008-10-29 09:55:56.000000000 +0100 +@@ -0,0 +1,409 @@ ++/* ++ * Dynamic DMA mapping support. ++ * ++ * On i386 there is no hardware dynamic DMA address translation, ++ * so consistent alloc/free are merely page allocation/freeing. ++ * The rest of the dynamic DMA mapping interface is implemented ++ * in asm/pci.h. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef __x86_64__ ++#include ++ ++int iommu_merge __read_mostly = 0; ++EXPORT_SYMBOL(iommu_merge); ++ ++dma_addr_t bad_dma_address __read_mostly; ++EXPORT_SYMBOL(bad_dma_address); ++ ++/* This tells the BIO block layer to assume merging. Default to off ++ because we cannot guarantee merging later. */ ++int iommu_bio_merge __read_mostly = 0; ++EXPORT_SYMBOL(iommu_bio_merge); ++ ++int force_iommu __read_mostly= 0; ++ ++__init int iommu_setup(char *p) ++{ ++ return 1; ++} ++ ++void __init pci_iommu_alloc(void) ++{ ++#ifdef CONFIG_SWIOTLB ++ pci_swiotlb_init(); ++#endif ++} ++ ++static int __init pci_iommu_init(void) ++{ ++ no_iommu_init(); ++ return 0; ++} ++ ++/* Must execute after PCI subsystem */ ++fs_initcall(pci_iommu_init); ++#endif ++ ++struct dma_coherent_mem { ++ void *virt_base; ++ u32 device_base; ++ int size; ++ int flags; ++ unsigned long *bitmap; ++}; ++ ++#define IOMMU_BUG_ON(test) \ ++do { \ ++ if (unlikely(test)) { \ ++ printk(KERN_ALERT "Fatal DMA error! " \ ++ "Please use 'swiotlb=force'\n"); \ ++ BUG(); \ ++ } \ ++} while (0) ++ ++static int check_pages_physically_contiguous(unsigned long pfn, ++ unsigned int offset, ++ size_t length) ++{ ++ unsigned long next_mfn; ++ int i; ++ int nr_pages; ++ ++ next_mfn = pfn_to_mfn(pfn); ++ nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT; ++ ++ for (i = 1; i < nr_pages; i++) { ++ if (pfn_to_mfn(++pfn) != ++next_mfn) ++ return 0; ++ } ++ return 1; ++} ++ ++int range_straddles_page_boundary(paddr_t p, size_t size) ++{ ++ unsigned long pfn = p >> PAGE_SHIFT; ++ unsigned int offset = p & ~PAGE_MASK; ++ ++ return ((offset + size > PAGE_SIZE) && ++ !check_pages_physically_contiguous(pfn, offset, size)); ++} ++ ++int ++dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, ++ enum dma_data_direction direction) ++{ ++ int i, rc; ++ ++ if (direction == DMA_NONE) ++ BUG(); ++ WARN_ON(nents == 0 || sg[0].length == 0); ++ ++ if (swiotlb) { ++ rc = swiotlb_map_sg(hwdev, sg, nents, direction); ++ } else { ++ for (i = 0; i < nents; i++ ) { ++ BUG_ON(!sg[i].page); ++ sg[i].dma_address = ++ gnttab_dma_map_page(sg[i].page) + sg[i].offset; ++ sg[i].dma_length = sg[i].length; ++ IOMMU_BUG_ON(address_needs_mapping( ++ hwdev, sg[i].dma_address)); ++ IOMMU_BUG_ON(range_straddles_page_boundary( ++ page_to_pseudophys(sg[i].page) + sg[i].offset, ++ sg[i].length)); ++ } ++ rc = nents; ++ } ++ ++ flush_write_buffers(); ++ return rc; ++} ++EXPORT_SYMBOL(dma_map_sg); ++ ++void ++dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, ++ enum dma_data_direction direction) ++{ ++ int i; ++ ++ BUG_ON(direction == DMA_NONE); ++ if (swiotlb) ++ swiotlb_unmap_sg(hwdev, sg, nents, direction); ++ else { ++ for (i = 0; i < nents; i++ ) ++ gnttab_dma_unmap_page(sg[i].dma_address); ++ } ++} ++EXPORT_SYMBOL(dma_unmap_sg); ++ ++#ifdef CONFIG_HIGHMEM ++dma_addr_t ++dma_map_page(struct device *dev, struct page *page, unsigned long offset, ++ size_t size, enum dma_data_direction direction) ++{ ++ dma_addr_t dma_addr; ++ ++ BUG_ON(direction == DMA_NONE); ++ ++ if (swiotlb) { ++ dma_addr = swiotlb_map_page( ++ dev, page, offset, size, direction); ++ } else { ++ dma_addr = gnttab_dma_map_page(page) + offset; ++ IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr)); ++ } ++ ++ return dma_addr; ++} ++EXPORT_SYMBOL(dma_map_page); ++ ++void ++dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, ++ enum dma_data_direction direction) ++{ ++ BUG_ON(direction == DMA_NONE); ++ if (swiotlb) ++ swiotlb_unmap_page(dev, dma_address, size, direction); ++ else ++ gnttab_dma_unmap_page(dma_address); ++} ++EXPORT_SYMBOL(dma_unmap_page); ++#endif /* CONFIG_HIGHMEM */ ++ ++int ++dma_mapping_error(dma_addr_t dma_addr) ++{ ++ if (swiotlb) ++ return swiotlb_dma_mapping_error(dma_addr); ++ return 0; ++} ++EXPORT_SYMBOL(dma_mapping_error); ++ ++int ++dma_supported(struct device *dev, u64 mask) ++{ ++ if (swiotlb) ++ return swiotlb_dma_supported(dev, mask); ++ /* ++ * By default we'll BUG when an infeasible DMA is requested, and ++ * request swiotlb=force (see IOMMU_BUG_ON). ++ */ ++ return 1; ++} ++EXPORT_SYMBOL(dma_supported); ++ ++void *dma_alloc_coherent(struct device *dev, size_t size, ++ dma_addr_t *dma_handle, gfp_t gfp) ++{ ++ void *ret; ++ struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; ++ unsigned int order = get_order(size); ++ unsigned long vstart; ++ u64 mask; ++ ++ /* ignore region specifiers */ ++ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); ++ ++ if (mem) { ++ int page = bitmap_find_free_region(mem->bitmap, mem->size, ++ order); ++ if (page >= 0) { ++ *dma_handle = mem->device_base + (page << PAGE_SHIFT); ++ ret = mem->virt_base + (page << PAGE_SHIFT); ++ memset(ret, 0, size); ++ return ret; ++ } ++ if (mem->flags & DMA_MEMORY_EXCLUSIVE) ++ return NULL; ++ } ++ ++ if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) ++ gfp |= GFP_DMA; ++ ++ vstart = __get_free_pages(gfp, order); ++ ret = (void *)vstart; ++ ++ if (dev != NULL && dev->coherent_dma_mask) ++ mask = dev->coherent_dma_mask; ++ else ++ mask = 0xffffffff; ++ ++ if (ret != NULL) { ++ if (xen_create_contiguous_region(vstart, order, ++ fls64(mask)) != 0) { ++ free_pages(vstart, order); ++ return NULL; ++ } ++ memset(ret, 0, size); ++ *dma_handle = virt_to_bus(ret); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(dma_alloc_coherent); ++ ++void dma_free_coherent(struct device *dev, size_t size, ++ void *vaddr, dma_addr_t dma_handle) ++{ ++ struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; ++ int order = get_order(size); ++ ++ if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { ++ int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; ++ ++ bitmap_release_region(mem->bitmap, page, order); ++ } else { ++ xen_destroy_contiguous_region((unsigned long)vaddr, order); ++ free_pages((unsigned long)vaddr, order); ++ } ++} ++EXPORT_SYMBOL(dma_free_coherent); ++ ++#ifdef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY ++int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, ++ dma_addr_t device_addr, size_t size, int flags) ++{ ++ void __iomem *mem_base; ++ int pages = size >> PAGE_SHIFT; ++ int bitmap_size = (pages + 31)/32; ++ ++ if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) ++ goto out; ++ if (!size) ++ goto out; ++ if (dev->dma_mem) ++ goto out; ++ ++ /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ ++ ++ mem_base = ioremap(bus_addr, size); ++ if (!mem_base) ++ goto out; ++ ++ dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); ++ if (!dev->dma_mem) ++ goto out; ++ memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem)); ++ dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL); ++ if (!dev->dma_mem->bitmap) ++ goto free1_out; ++ memset(dev->dma_mem->bitmap, 0, bitmap_size); ++ ++ dev->dma_mem->virt_base = mem_base; ++ dev->dma_mem->device_base = device_addr; ++ dev->dma_mem->size = pages; ++ dev->dma_mem->flags = flags; ++ ++ if (flags & DMA_MEMORY_MAP) ++ return DMA_MEMORY_MAP; ++ ++ return DMA_MEMORY_IO; ++ ++ free1_out: ++ kfree(dev->dma_mem->bitmap); ++ out: ++ return 0; ++} ++EXPORT_SYMBOL(dma_declare_coherent_memory); ++ ++void dma_release_declared_memory(struct device *dev) ++{ ++ struct dma_coherent_mem *mem = dev->dma_mem; ++ ++ if(!mem) ++ return; ++ dev->dma_mem = NULL; ++ iounmap(mem->virt_base); ++ kfree(mem->bitmap); ++ kfree(mem); ++} ++EXPORT_SYMBOL(dma_release_declared_memory); ++ ++void *dma_mark_declared_memory_occupied(struct device *dev, ++ dma_addr_t device_addr, size_t size) ++{ ++ struct dma_coherent_mem *mem = dev->dma_mem; ++ int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; ++ int pos, err; ++ ++ if (!mem) ++ return ERR_PTR(-EINVAL); ++ ++ pos = (device_addr - mem->device_base) >> PAGE_SHIFT; ++ err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); ++ if (err != 0) ++ return ERR_PTR(err); ++ return mem->virt_base + (pos << PAGE_SHIFT); ++} ++EXPORT_SYMBOL(dma_mark_declared_memory_occupied); ++#endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */ ++ ++dma_addr_t ++dma_map_single(struct device *dev, void *ptr, size_t size, ++ enum dma_data_direction direction) ++{ ++ dma_addr_t dma; ++ ++ if (direction == DMA_NONE) ++ BUG(); ++ WARN_ON(size == 0); ++ ++ if (swiotlb) { ++ dma = swiotlb_map_single(dev, ptr, size, direction); ++ } else { ++ dma = gnttab_dma_map_page(virt_to_page(ptr)) + ++ offset_in_page(ptr); ++ IOMMU_BUG_ON(range_straddles_page_boundary(__pa(ptr), size)); ++ IOMMU_BUG_ON(address_needs_mapping(dev, dma)); ++ } ++ ++ flush_write_buffers(); ++ return dma; ++} ++EXPORT_SYMBOL(dma_map_single); ++ ++void ++dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, ++ enum dma_data_direction direction) ++{ ++ if (direction == DMA_NONE) ++ BUG(); ++ if (swiotlb) ++ swiotlb_unmap_single(dev, dma_addr, size, direction); ++ else ++ gnttab_dma_unmap_page(dma_addr); ++} ++EXPORT_SYMBOL(dma_unmap_single); ++ ++void ++dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, ++ enum dma_data_direction direction) ++{ ++ if (swiotlb) ++ swiotlb_sync_single_for_cpu(dev, dma_handle, size, direction); ++} ++EXPORT_SYMBOL(dma_sync_single_for_cpu); ++ ++void ++dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, ++ enum dma_data_direction direction) ++{ ++ if (swiotlb) ++ swiotlb_sync_single_for_device(dev, dma_handle, size, direction); ++} ++EXPORT_SYMBOL(dma_sync_single_for_device); +Index: head-2008-11-25/arch/x86/kernel/process_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/process_32-xen.c 2008-07-21 11:00:32.000000000 +0200 +@@ -0,0 +1,877 @@ ++/* ++ * linux/arch/i386/kernel/process.c ++ * ++ * Copyright (C) 1995 Linus Torvalds ++ * ++ * Pentium III FXSR, SSE support ++ * Gareth Hughes , May 2000 ++ */ ++ ++/* ++ * This file handles the architecture-dependent parts of process handling.. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_MATH_EMULATION ++#include ++#endif ++ ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++ ++asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); ++ ++static int hlt_counter; ++ ++unsigned long boot_option_idle_override = 0; ++EXPORT_SYMBOL(boot_option_idle_override); ++ ++/* ++ * Return saved PC of a blocked thread. ++ */ ++unsigned long thread_saved_pc(struct task_struct *tsk) ++{ ++ return ((unsigned long *)tsk->thread.esp)[3]; ++} ++ ++/* ++ * Powermanagement idle function, if any.. ++ */ ++void (*pm_idle)(void); ++EXPORT_SYMBOL(pm_idle); ++static DEFINE_PER_CPU(unsigned int, cpu_idle_state); ++ ++void disable_hlt(void) ++{ ++ hlt_counter++; ++} ++ ++EXPORT_SYMBOL(disable_hlt); ++ ++void enable_hlt(void) ++{ ++ hlt_counter--; ++} ++ ++EXPORT_SYMBOL(enable_hlt); ++ ++/* ++ * On SMP it's slightly faster (but much more power-consuming!) ++ * to poll the ->work.need_resched flag instead of waiting for the ++ * cross-CPU IPI to arrive. Use this option with caution. ++ */ ++static void poll_idle (void) ++{ ++ local_irq_enable(); ++ ++ asm volatile( ++ "2:" ++ "testl %0, %1;" ++ "rep; nop;" ++ "je 2b;" ++ : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); ++} ++ ++static void xen_idle(void) ++{ ++ local_irq_disable(); ++ ++ if (need_resched()) ++ local_irq_enable(); ++ else { ++ current_thread_info()->status &= ~TS_POLLING; ++ smp_mb__after_clear_bit(); ++ safe_halt(); ++ current_thread_info()->status |= TS_POLLING; ++ } ++} ++#ifdef CONFIG_APM_MODULE ++EXPORT_SYMBOL(default_idle); ++#endif ++ ++#ifdef CONFIG_HOTPLUG_CPU ++extern cpumask_t cpu_initialized; ++static inline void play_dead(void) ++{ ++ idle_task_exit(); ++ local_irq_disable(); ++ cpu_clear(smp_processor_id(), cpu_initialized); ++ preempt_enable_no_resched(); ++ VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL)); ++ cpu_bringup(); ++} ++#else ++static inline void play_dead(void) ++{ ++ BUG(); ++} ++#endif /* CONFIG_HOTPLUG_CPU */ ++ ++/* ++ * The idle thread. There's no useful work to be ++ * done, so just try to conserve power and have a ++ * low exit latency (ie sit in a loop waiting for ++ * somebody to say that they'd like to reschedule) ++ */ ++void cpu_idle(void) ++{ ++ int cpu = smp_processor_id(); ++ ++ current_thread_info()->status |= TS_POLLING; ++ ++ /* endless idle loop with no priority at all */ ++ while (1) { ++ while (!need_resched()) { ++ void (*idle)(void); ++ ++ if (__get_cpu_var(cpu_idle_state)) ++ __get_cpu_var(cpu_idle_state) = 0; ++ ++ rmb(); ++ idle = xen_idle; /* no alternatives */ ++ ++ if (cpu_is_offline(cpu)) ++ play_dead(); ++ ++ __get_cpu_var(irq_stat).idle_timestamp = jiffies; ++ idle(); ++ } ++ preempt_enable_no_resched(); ++ schedule(); ++ preempt_disable(); ++ } ++} ++ ++void cpu_idle_wait(void) ++{ ++ unsigned int cpu, this_cpu = get_cpu(); ++ cpumask_t map; ++ ++ set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); ++ put_cpu(); ++ ++ cpus_clear(map); ++ for_each_online_cpu(cpu) { ++ per_cpu(cpu_idle_state, cpu) = 1; ++ cpu_set(cpu, map); ++ } ++ ++ __get_cpu_var(cpu_idle_state) = 0; ++ ++ wmb(); ++ do { ++ ssleep(1); ++ for_each_online_cpu(cpu) { ++ if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) ++ cpu_clear(cpu, map); ++ } ++ cpus_and(map, map, cpu_online_map); ++ } while (!cpus_empty(map)); ++} ++EXPORT_SYMBOL_GPL(cpu_idle_wait); ++ ++void __devinit select_idle_routine(const struct cpuinfo_x86 *c) ++{ ++} ++ ++static int __init idle_setup (char *str) ++{ ++ if (!strncmp(str, "poll", 4)) { ++ printk("using polling idle threads.\n"); ++ pm_idle = poll_idle; ++ } ++ ++ boot_option_idle_override = 1; ++ return 1; ++} ++ ++__setup("idle=", idle_setup); ++ ++void show_regs(struct pt_regs * regs) ++{ ++ unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; ++ ++ printk("\n"); ++ printk("Pid: %d, comm: %20s\n", current->pid, current->comm); ++ printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); ++ print_symbol("EIP is at %s\n", regs->eip); ++ ++ if (user_mode_vm(regs)) ++ printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); ++ printk(" EFLAGS: %08lx %s (%s %.*s)\n", ++ regs->eflags, print_tainted(), system_utsname.release, ++ (int)strcspn(system_utsname.version, " "), ++ system_utsname.version); ++ printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", ++ regs->eax,regs->ebx,regs->ecx,regs->edx); ++ printk("ESI: %08lx EDI: %08lx EBP: %08lx", ++ regs->esi, regs->edi, regs->ebp); ++ printk(" DS: %04x ES: %04x\n", ++ 0xffff & regs->xds,0xffff & regs->xes); ++ ++ cr0 = read_cr0(); ++ cr2 = read_cr2(); ++ cr3 = read_cr3(); ++ cr4 = read_cr4_safe(); ++ printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); ++ show_trace(NULL, regs, ®s->esp); ++} ++ ++/* ++ * This gets run with %ebx containing the ++ * function to call, and %edx containing ++ * the "args". ++ */ ++extern void kernel_thread_helper(void); ++__asm__(".section .text\n" ++ ".align 4\n" ++ "kernel_thread_helper:\n\t" ++ "movl %edx,%eax\n\t" ++ "pushl %edx\n\t" ++ "call *%ebx\n\t" ++ "pushl %eax\n\t" ++ "call do_exit\n" ++ ".previous"); ++ ++/* ++ * Create a kernel thread ++ */ ++int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) ++{ ++ struct pt_regs regs; ++ ++ memset(®s, 0, sizeof(regs)); ++ ++ regs.ebx = (unsigned long) fn; ++ regs.edx = (unsigned long) arg; ++ ++ regs.xds = __USER_DS; ++ regs.xes = __USER_DS; ++ regs.orig_eax = -1; ++ regs.eip = (unsigned long) kernel_thread_helper; ++ regs.xcs = GET_KERNEL_CS(); ++ regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; ++ ++ /* Ok, create the new process.. */ ++ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); ++} ++EXPORT_SYMBOL(kernel_thread); ++ ++/* ++ * Free current thread data structures etc.. ++ */ ++void exit_thread(void) ++{ ++ /* The process may have allocated an io port bitmap... nuke it. */ ++ if (unlikely(test_thread_flag(TIF_IO_BITMAP))) { ++ struct task_struct *tsk = current; ++ struct thread_struct *t = &tsk->thread; ++ struct physdev_set_iobitmap set_iobitmap; ++ memset(&set_iobitmap, 0, sizeof(set_iobitmap)); ++ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, ++ &set_iobitmap)); ++ kfree(t->io_bitmap_ptr); ++ t->io_bitmap_ptr = NULL; ++ clear_thread_flag(TIF_IO_BITMAP); ++ } ++} ++ ++void flush_thread(void) ++{ ++ struct task_struct *tsk = current; ++ ++ memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); ++ memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); ++ clear_tsk_thread_flag(tsk, TIF_DEBUG); ++ /* ++ * Forget coprocessor state.. ++ */ ++ clear_fpu(tsk); ++ clear_used_math(); ++} ++ ++void release_thread(struct task_struct *dead_task) ++{ ++ BUG_ON(dead_task->mm); ++ release_vm86_irqs(dead_task); ++} ++ ++/* ++ * This gets called before we allocate a new thread and copy ++ * the current task into it. ++ */ ++void prepare_to_copy(struct task_struct *tsk) ++{ ++ unlazy_fpu(tsk); ++} ++ ++int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, ++ unsigned long unused, ++ struct task_struct * p, struct pt_regs * regs) ++{ ++ struct pt_regs * childregs; ++ struct task_struct *tsk; ++ int err; ++ ++ childregs = task_pt_regs(p); ++ *childregs = *regs; ++ childregs->eax = 0; ++ childregs->esp = esp; ++ ++ p->thread.esp = (unsigned long) childregs; ++ p->thread.esp0 = (unsigned long) (childregs+1); ++ ++ p->thread.eip = (unsigned long) ret_from_fork; ++ ++ savesegment(fs,p->thread.fs); ++ savesegment(gs,p->thread.gs); ++ ++ tsk = current; ++ if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { ++ p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); ++ if (!p->thread.io_bitmap_ptr) { ++ p->thread.io_bitmap_max = 0; ++ return -ENOMEM; ++ } ++ memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr, ++ IO_BITMAP_BYTES); ++ set_tsk_thread_flag(p, TIF_IO_BITMAP); ++ } ++ ++ /* ++ * Set a new TLS for the child thread? ++ */ ++ if (clone_flags & CLONE_SETTLS) { ++ struct desc_struct *desc; ++ struct user_desc info; ++ int idx; ++ ++ err = -EFAULT; ++ if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info))) ++ goto out; ++ err = -EINVAL; ++ if (LDT_empty(&info)) ++ goto out; ++ ++ idx = info.entry_number; ++ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) ++ goto out; ++ ++ desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; ++ desc->a = LDT_entry_a(&info); ++ desc->b = LDT_entry_b(&info); ++ } ++ ++ p->thread.iopl = current->thread.iopl; ++ ++ err = 0; ++ out: ++ if (err && p->thread.io_bitmap_ptr) { ++ kfree(p->thread.io_bitmap_ptr); ++ p->thread.io_bitmap_max = 0; ++ } ++ return err; ++} ++ ++/* ++ * fill in the user structure for a core dump.. ++ */ ++void dump_thread(struct pt_regs * regs, struct user * dump) ++{ ++ int i; ++ ++/* changed the size calculations - should hopefully work better. lbt */ ++ dump->magic = CMAGIC; ++ dump->start_code = 0; ++ dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); ++ dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; ++ dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; ++ dump->u_dsize -= dump->u_tsize; ++ dump->u_ssize = 0; ++ for (i = 0; i < 8; i++) ++ dump->u_debugreg[i] = current->thread.debugreg[i]; ++ ++ if (dump->start_stack < TASK_SIZE) ++ dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; ++ ++ dump->regs.ebx = regs->ebx; ++ dump->regs.ecx = regs->ecx; ++ dump->regs.edx = regs->edx; ++ dump->regs.esi = regs->esi; ++ dump->regs.edi = regs->edi; ++ dump->regs.ebp = regs->ebp; ++ dump->regs.eax = regs->eax; ++ dump->regs.ds = regs->xds; ++ dump->regs.es = regs->xes; ++ savesegment(fs,dump->regs.fs); ++ savesegment(gs,dump->regs.gs); ++ dump->regs.orig_eax = regs->orig_eax; ++ dump->regs.eip = regs->eip; ++ dump->regs.cs = regs->xcs; ++ dump->regs.eflags = regs->eflags; ++ dump->regs.esp = regs->esp; ++ dump->regs.ss = regs->xss; ++ ++ dump->u_fpvalid = dump_fpu (regs, &dump->i387); ++} ++EXPORT_SYMBOL(dump_thread); ++ ++/* ++ * Capture the user space registers if the task is not running (in user space) ++ */ ++int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) ++{ ++ struct pt_regs ptregs = *task_pt_regs(tsk); ++ ptregs.xcs &= 0xffff; ++ ptregs.xds &= 0xffff; ++ ptregs.xes &= 0xffff; ++ ptregs.xss &= 0xffff; ++ ++ elf_core_copy_regs(regs, &ptregs); ++ ++ return 1; ++} ++ ++static noinline void __switch_to_xtra(struct task_struct *next_p) ++{ ++ struct thread_struct *next; ++ ++ next = &next_p->thread; ++ ++ if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { ++ set_debugreg(next->debugreg[0], 0); ++ set_debugreg(next->debugreg[1], 1); ++ set_debugreg(next->debugreg[2], 2); ++ set_debugreg(next->debugreg[3], 3); ++ /* no 4 and 5 */ ++ set_debugreg(next->debugreg[6], 6); ++ set_debugreg(next->debugreg[7], 7); ++ } ++} ++ ++/* ++ * This function selects if the context switch from prev to next ++ * has to tweak the TSC disable bit in the cr4. ++ */ ++static inline void disable_tsc(struct task_struct *prev_p, ++ struct task_struct *next_p) ++{ ++ struct thread_info *prev, *next; ++ ++ /* ++ * gcc should eliminate the ->thread_info dereference if ++ * has_secure_computing returns 0 at compile time (SECCOMP=n). ++ */ ++ prev = task_thread_info(prev_p); ++ next = task_thread_info(next_p); ++ ++ if (has_secure_computing(prev) || has_secure_computing(next)) { ++ /* slow path here */ ++ if (has_secure_computing(prev) && ++ !has_secure_computing(next)) { ++ write_cr4(read_cr4() & ~X86_CR4_TSD); ++ } else if (!has_secure_computing(prev) && ++ has_secure_computing(next)) ++ write_cr4(read_cr4() | X86_CR4_TSD); ++ } ++} ++ ++/* ++ * switch_to(x,yn) should switch tasks from x to y. ++ * ++ * We fsave/fwait so that an exception goes off at the right time ++ * (as a call from the fsave or fwait in effect) rather than to ++ * the wrong process. Lazy FP saving no longer makes any sense ++ * with modern CPU's, and this simplifies a lot of things (SMP ++ * and UP become the same). ++ * ++ * NOTE! We used to use the x86 hardware context switching. The ++ * reason for not using it any more becomes apparent when you ++ * try to recover gracefully from saved state that is no longer ++ * valid (stale segment register values in particular). With the ++ * hardware task-switch, there is no way to fix up bad state in ++ * a reasonable manner. ++ * ++ * The fact that Intel documents the hardware task-switching to ++ * be slow is a fairly red herring - this code is not noticeably ++ * faster. However, there _is_ some room for improvement here, ++ * so the performance issues may eventually be a valid point. ++ * More important, however, is the fact that this allows us much ++ * more flexibility. ++ * ++ * The return value (in %eax) will be the "prev" task after ++ * the task-switch, and shows up in ret_from_fork in entry.S, ++ * for example. ++ */ ++struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ++{ ++ struct thread_struct *prev = &prev_p->thread, ++ *next = &next_p->thread; ++ int cpu = smp_processor_id(); ++#ifndef CONFIG_X86_NO_TSS ++ struct tss_struct *tss = &per_cpu(init_tss, cpu); ++#endif ++#if CONFIG_XEN_COMPAT > 0x030002 ++ struct physdev_set_iopl iopl_op; ++ struct physdev_set_iobitmap iobmp_op; ++#else ++ struct physdev_op _pdo[2], *pdo = _pdo; ++#define iopl_op pdo->u.set_iopl ++#define iobmp_op pdo->u.set_iobitmap ++#endif ++ multicall_entry_t _mcl[8], *mcl = _mcl; ++ ++ /* XEN NOTE: FS/GS saved in switch_mm(), not here. */ ++ ++ /* ++ * This is basically '__unlazy_fpu', except that we queue a ++ * multicall to indicate FPU task switch, rather than ++ * synchronously trapping to Xen. ++ */ ++ if (prev_p->thread_info->status & TS_USEDFPU) { ++ __save_init_fpu(prev_p); /* _not_ save_init_fpu() */ ++ mcl->op = __HYPERVISOR_fpu_taskswitch; ++ mcl->args[0] = 1; ++ mcl++; ++ } ++#if 0 /* lazy fpu sanity check */ ++ else BUG_ON(!(read_cr0() & 8)); ++#endif ++ ++ /* ++ * Reload esp0. ++ * This is load_esp0(tss, next) with a multicall. ++ */ ++ mcl->op = __HYPERVISOR_stack_switch; ++ mcl->args[0] = __KERNEL_DS; ++ mcl->args[1] = next->esp0; ++ mcl++; ++ ++ /* ++ * Load the per-thread Thread-Local Storage descriptor. ++ * This is load_TLS(next, cpu) with multicalls. ++ */ ++#define C(i) do { \ ++ if (unlikely(next->tls_array[i].a != prev->tls_array[i].a || \ ++ next->tls_array[i].b != prev->tls_array[i].b)) { \ ++ mcl->op = __HYPERVISOR_update_descriptor; \ ++ *(u64 *)&mcl->args[0] = virt_to_machine( \ ++ &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\ ++ *(u64 *)&mcl->args[2] = *(u64 *)&next->tls_array[i]; \ ++ mcl++; \ ++ } \ ++} while (0) ++ C(0); C(1); C(2); ++#undef C ++ ++ if (unlikely(prev->iopl != next->iopl)) { ++ iopl_op.iopl = (next->iopl == 0) ? 1 : (next->iopl >> 12) & 3; ++#if CONFIG_XEN_COMPAT > 0x030002 ++ mcl->op = __HYPERVISOR_physdev_op; ++ mcl->args[0] = PHYSDEVOP_set_iopl; ++ mcl->args[1] = (unsigned long)&iopl_op; ++#else ++ mcl->op = __HYPERVISOR_physdev_op_compat; ++ pdo->cmd = PHYSDEVOP_set_iopl; ++ mcl->args[0] = (unsigned long)pdo++; ++#endif ++ mcl++; ++ } ++ ++ if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) { ++ set_xen_guest_handle(iobmp_op.bitmap, ++ (char *)next->io_bitmap_ptr); ++ iobmp_op.nr_ports = next->io_bitmap_ptr ? IO_BITMAP_BITS : 0; ++#if CONFIG_XEN_COMPAT > 0x030002 ++ mcl->op = __HYPERVISOR_physdev_op; ++ mcl->args[0] = PHYSDEVOP_set_iobitmap; ++ mcl->args[1] = (unsigned long)&iobmp_op; ++#else ++ mcl->op = __HYPERVISOR_physdev_op_compat; ++ pdo->cmd = PHYSDEVOP_set_iobitmap; ++ mcl->args[0] = (unsigned long)pdo++; ++#endif ++ mcl++; ++ } ++ ++#if CONFIG_XEN_COMPAT <= 0x030002 ++ BUG_ON(pdo > _pdo + ARRAY_SIZE(_pdo)); ++#endif ++ BUG_ON(mcl > _mcl + ARRAY_SIZE(_mcl)); ++ if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL))) ++ BUG(); ++ ++ /* ++ * Restore %fs and %gs if needed. ++ * ++ * Glibc normally makes %fs be zero, and %gs is one of ++ * the TLS segments. ++ */ ++ if (unlikely(next->fs)) ++ loadsegment(fs, next->fs); ++ ++ if (next->gs) ++ loadsegment(gs, next->gs); ++ ++ /* ++ * Now maybe handle debug registers ++ */ ++ if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) ++ __switch_to_xtra(next_p); ++ ++ disable_tsc(prev_p, next_p); ++ ++ return prev_p; ++} ++ ++asmlinkage int sys_fork(struct pt_regs regs) ++{ ++ return do_fork(SIGCHLD, regs.esp, ®s, 0, NULL, NULL); ++} ++ ++asmlinkage int sys_clone(struct pt_regs regs) ++{ ++ unsigned long clone_flags; ++ unsigned long newsp; ++ int __user *parent_tidptr, *child_tidptr; ++ ++ clone_flags = regs.ebx; ++ newsp = regs.ecx; ++ parent_tidptr = (int __user *)regs.edx; ++ child_tidptr = (int __user *)regs.edi; ++ if (!newsp) ++ newsp = regs.esp; ++ return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); ++} ++ ++/* ++ * This is trivial, and on the face of it looks like it ++ * could equally well be done in user mode. ++ * ++ * Not so, for quite unobvious reasons - register pressure. ++ * In user mode vfork() cannot have a stack frame, and if ++ * done by calling the "clone()" system call directly, you ++ * do not have enough call-clobbered registers to hold all ++ * the information you need. ++ */ ++asmlinkage int sys_vfork(struct pt_regs regs) ++{ ++ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0, NULL, NULL); ++} ++ ++/* ++ * sys_execve() executes a new program. ++ */ ++asmlinkage int sys_execve(struct pt_regs regs) ++{ ++ int error; ++ char * filename; ++ ++ filename = getname((char __user *) regs.ebx); ++ error = PTR_ERR(filename); ++ if (IS_ERR(filename)) ++ goto out; ++ error = do_execve(filename, ++ (char __user * __user *) regs.ecx, ++ (char __user * __user *) regs.edx, ++ ®s); ++ if (error == 0) { ++ task_lock(current); ++ current->ptrace &= ~PT_DTRACE; ++ task_unlock(current); ++ /* Make sure we don't return using sysenter.. */ ++ set_thread_flag(TIF_IRET); ++ } ++ putname(filename); ++out: ++ return error; ++} ++ ++#define top_esp (THREAD_SIZE - sizeof(unsigned long)) ++#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) ++ ++unsigned long get_wchan(struct task_struct *p) ++{ ++ unsigned long ebp, esp, eip; ++ unsigned long stack_page; ++ int count = 0; ++ if (!p || p == current || p->state == TASK_RUNNING) ++ return 0; ++ stack_page = (unsigned long)task_stack_page(p); ++ esp = p->thread.esp; ++ if (!stack_page || esp < stack_page || esp > top_esp+stack_page) ++ return 0; ++ /* include/asm-i386/system.h:switch_to() pushes ebp last. */ ++ ebp = *(unsigned long *) esp; ++ do { ++ if (ebp < stack_page || ebp > top_ebp+stack_page) ++ return 0; ++ eip = *(unsigned long *) (ebp+4); ++ if (!in_sched_functions(eip)) ++ return eip; ++ ebp = *(unsigned long *) ebp; ++ } while (count++ < 16); ++ return 0; ++} ++ ++/* ++ * sys_alloc_thread_area: get a yet unused TLS descriptor index. ++ */ ++static int get_free_idx(void) ++{ ++ struct thread_struct *t = ¤t->thread; ++ int idx; ++ ++ for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) ++ if (desc_empty(t->tls_array + idx)) ++ return idx + GDT_ENTRY_TLS_MIN; ++ return -ESRCH; ++} ++ ++/* ++ * Set a given TLS descriptor: ++ */ ++asmlinkage int sys_set_thread_area(struct user_desc __user *u_info) ++{ ++ struct thread_struct *t = ¤t->thread; ++ struct user_desc info; ++ struct desc_struct *desc; ++ int cpu, idx; ++ ++ if (copy_from_user(&info, u_info, sizeof(info))) ++ return -EFAULT; ++ idx = info.entry_number; ++ ++ /* ++ * index -1 means the kernel should try to find and ++ * allocate an empty descriptor: ++ */ ++ if (idx == -1) { ++ idx = get_free_idx(); ++ if (idx < 0) ++ return idx; ++ if (put_user(idx, &u_info->entry_number)) ++ return -EFAULT; ++ } ++ ++ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) ++ return -EINVAL; ++ ++ desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN; ++ ++ /* ++ * We must not get preempted while modifying the TLS. ++ */ ++ cpu = get_cpu(); ++ ++ if (LDT_empty(&info)) { ++ desc->a = 0; ++ desc->b = 0; ++ } else { ++ desc->a = LDT_entry_a(&info); ++ desc->b = LDT_entry_b(&info); ++ } ++ load_TLS(t, cpu); ++ ++ put_cpu(); ++ ++ return 0; ++} ++ ++/* ++ * Get the current Thread-Local Storage area: ++ */ ++ ++#define GET_BASE(desc) ( \ ++ (((desc)->a >> 16) & 0x0000ffff) | \ ++ (((desc)->b << 16) & 0x00ff0000) | \ ++ ( (desc)->b & 0xff000000) ) ++ ++#define GET_LIMIT(desc) ( \ ++ ((desc)->a & 0x0ffff) | \ ++ ((desc)->b & 0xf0000) ) ++ ++#define GET_32BIT(desc) (((desc)->b >> 22) & 1) ++#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) ++#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) ++#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) ++#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) ++#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) ++ ++asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) ++{ ++ struct user_desc info; ++ struct desc_struct *desc; ++ int idx; ++ ++ if (get_user(idx, &u_info->entry_number)) ++ return -EFAULT; ++ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) ++ return -EINVAL; ++ ++ memset(&info, 0, sizeof(info)); ++ ++ desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; ++ ++ info.entry_number = idx; ++ info.base_addr = GET_BASE(desc); ++ info.limit = GET_LIMIT(desc); ++ info.seg_32bit = GET_32BIT(desc); ++ info.contents = GET_CONTENTS(desc); ++ info.read_exec_only = !GET_WRITABLE(desc); ++ info.limit_in_pages = GET_LIMIT_PAGES(desc); ++ info.seg_not_present = !GET_PRESENT(desc); ++ info.useable = GET_USEABLE(desc); ++ ++ if (copy_to_user(u_info, &info, sizeof(info))) ++ return -EFAULT; ++ return 0; ++} ++ ++unsigned long arch_align_stack(unsigned long sp) ++{ ++ if (randomize_va_space) ++ sp -= get_random_int() % 8192; ++ return sp & ~0xf; ++} +Index: head-2008-11-25/arch/x86/kernel/quirks-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/quirks-xen.c 2008-01-28 12:24:19.000000000 +0100 +@@ -0,0 +1,47 @@ ++/* ++ * This file contains work-arounds for x86 and x86_64 platform bugs. ++ */ ++#include ++#include ++ ++#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_XEN)) && defined(CONFIG_PCI) ++ ++static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) ++{ ++ u8 config, rev; ++ u32 word; ++ ++ /* BIOS may enable hardware IRQ balancing for ++ * E7520/E7320/E7525(revision ID 0x9 and below) ++ * based platforms. ++ * Disable SW irqbalance/affinity on those platforms. ++ */ ++ pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); ++ if (rev > 0x9) ++ return; ++ ++ printk(KERN_INFO "Intel E7520/7320/7525 detected."); ++ ++ /* enable access to config space*/ ++ pci_read_config_byte(dev, 0xf4, &config); ++ pci_write_config_byte(dev, 0xf4, config|0x2); ++ ++ /* read xTPR register */ ++ raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); ++ ++ if (!(word & (1 << 13))) { ++ struct xen_platform_op op; ++ printk(KERN_INFO "Disabling irq balancing and affinity\n"); ++ op.cmd = XENPF_platform_quirk; ++ op.u.platform_quirk.quirk_id = QUIRK_NOIRQBALANCING; ++ WARN_ON(HYPERVISOR_platform_op(&op)); ++ } ++ ++ /* put back the original value for config space*/ ++ if (!(config & 0x2)) ++ pci_write_config_byte(dev, 0xf4, config); ++} ++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); ++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); ++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); ++#endif +Index: head-2008-11-25/arch/x86/kernel/setup_32-xen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ head-2008-11-25/arch/x86/kernel/setup_32-xen.c 2008-04-22 15:41:51.000000000 +0200 +@@ -0,0 +1,1919 @@ ++/* ++ * linux/arch/i386/kernel/setup.c ++ * ++ * Copyright (C) 1995 Linus Torvalds ++ * ++ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 ++ * ++ * Memory region support ++ * David Parsons , July-August 1999 ++ * ++ * Added E820 sanitization routine (removes overlapping memory regions); ++ * Brian Moyle , February 2001 ++ * ++ * Moved CPU detection code to cpu/${cpu}.c ++ * Patrick Mochel , March 2002 ++ * ++ * Provisions for empty E820 memory regions (reported by certain BIOSes). ++ * Alex Achenbach , December 2002. ++ * ++ */ ++ ++/* ++ * This file handles the architecture-dependent parts of initialization ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include